acenturyandabit / code2dia Goto Github PK
View Code? Open in Web Editor NEWGenerate diagrams from code
Generate diagrams from code
A .pu and a .svg file are written to disk by code2dia/generator.py, as part of the generation process.
Or we can at least make this optional.
from fastapi.responses import HTMLResponse, PlainTextResponse
import os
from pathlib import Path
from watchfiles import awatch, Change
import asyncio
import re
import code2dia
import logging
class Server:
def __init__(self, DIAGRAM_DEFINITION_FILE):
self.DIAGRAM_DEFINITION_FILE = DIAGRAM_DEFINITION_FILE
self.DIAGRAM_OUTPUT_FILE = re.sub("\.py$", ".svg", DIAGRAM_DEFINITION_FILE)
app = FastAPI()
self.app = app
self.websocket = None
# TODO: The viewer should be able to zoom in and out.
# TODO: Move this html string to an actual file
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>{DIAGRAM_DEFINITION_FILE}</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/svg-pan-zoom.min.js"></script>
</head>
<body style="margin:0">
<div id="container" style="width: 90vw; height: 90vh; border:1px solid black; ">
</div>
<script>
var ws = new WebSocket("ws://localhost:8000/ws");
ws.onmessage = function(event) {{
if (event.data=="reload"){{
reload();
}}
}};
const reload = async ()=>{{
const diagramResponse = await fetch("/diagram");
const diagramText = await diagramResponse.text()
const containerEl = document.querySelector("#container");
containerEl.innerHTML = diagramText;
const svgElement = containerEl.children[0];
svgElement.style.cssText="display: inline; width: inherit; min-width: inherit; max-width: inherit; height: inherit; min-height: inherit; max-height: inherit;";
svgPanZoom(svgElement, {{
zoomEnabled: true,
controlIconsEnabled: true,
fit: true,
center: true
}});
}}
reload();
</script>
</body>
</html>
"""
self.queue = asyncio.Queue()
@app.on_event("startup")
async def startup_event():
asyncio.create_task(self.watchFile())
@app.get("/")
async def get():
return HTMLResponse(html)
@app.get("/diagram")
async def get():
diagramText = ""
if not os.path.exists(self.DIAGRAM_OUTPUT_FILE):
await self.generateSVG()
with open(self.DIAGRAM_OUTPUT_FILE) as f:
diagramText = f.read()
return PlainTextResponse(diagramText)
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
self.websocket = websocket
while True:
data = await websocket.receive_text()
await websocket.send_text(data)
async def generateSVG(self):
# TODO: Remove any file IO from SVG generation
# A .pu and a .svg file are written to disk by code2dia/generator.pym,
# as part of the generation process.
# Or we can at least make this optional.
# Reload the file
DIAGRAM_PLANTUML_FILE = re.sub("\.py$", ".pu", self.DIAGRAM_DEFINITION_FILE)
proc = await asyncio.create_subprocess_shell(
f"python {self.DIAGRAM_DEFINITION_FILE}"
)
await proc.communicate()
content = await code2dia.generator.generateSVG(DIAGRAM_PLANTUML_FILE)
with open(self.DIAGRAM_OUTPUT_FILE, "wb") as f:
f.write(content)
async def watchFile(self):
async for changes in awatch(self.DIAGRAM_DEFINITION_FILE):
for change in changes:
if change[0] == Change.modified:
await self.generateSVG()
if self.websocket is not None:
await self.websocket.send_text("reload")
"type": "isCallOf",
"from": fullCallName,
"to": functionName
})
import re
import os
# HEURISTIC: extensions
def getFilesAndFolders(path: str, extensions = [".cpp","c"]):
objects = [{
"type":"folder",
"name": "root"
}]
relations = []
for root, dirs, files in os.walk(path):
subRoot = "root"+root[len(path) :]
# HEURISTIC: ignore .git
if ".git" in subRoot:
continue
for dir in dirs:
fullFolderName = subRoot + os.path.sep + dir
objects.append({
"type":"folder",
"name": fullFolderName
})
relations.append({
"type": "in",
"from": fullFolderName,
"to": subRoot
})
for file in files:
for ext in extensions:
if file.endswith(ext):
fullFileName = subRoot + os.path.sep + file
objects.append({
"type":"file",
"name": fullFileName
})
relations.append({
"type": "in",
"from": fullFileName,
"to": subRoot
})
return objects, relations
def getFunctions(path, fileObjects, language):
objects = [{
"type":"function_definition",
"name": "global"
}]
relations = []
files = filter(lambda obj: obj["type"]=="file", fileObjects)
# Read each file
for file in files:
objects, relations = mineFile(path, file["name"], objects, relations)
return objects, relations
def mineFile(basePath, filename, objects, relations):
with open (basePath + filename[len("root"):]) as f:
context = []
c = " "
objects.append({
"type": "function_definition",
"name": filename+"_main"
})
relations.append({
"type": "in",
"from": filename+"_main",
"to": filename
})
while c:
c = f.read(1)
context, objects, relations = stateReduceCharacter(context, objects, relations, filename, c)
return objects, relations
# TODO: reduce number of arguments of this function
def stateReduceCharacter(context, objects, relations, filename, c):
try:
if re.match("[a-zA-Z0-9_-]",c):
if len(context) == 0:
context.append("")
context[len(context)-1] += c
else:
if c == "(":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == ")":
# print ("pre")
# print (context)
context = removeAfterHighestIndexOf("(", context)
if len (context) > 0 and context[-1] == "if" or context[-1] == "while":
context = context[:-1]
elif isIdentifier(context[-1]):
# This is either a function call or a function declaration, depending on
# whether we see a ; or a { next
# So flag it as a function candidate
context[-1] = "fn_candidate:" + context[-1]
# print (context)
elif c == "{":
if len (context) > 0 and context[-1] == "" and len(context) > 1:
context = context[:-1]
# upgrade previous fn candidate to real function
if context[-1].startswith("fn_candidate:"):
# print (context)
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_definition:" + functionName
# print (functionName)
objects.append({
"type": "function_definition",
"name": functionName
})
relations.append({
"type": "in",
"from": functionName,
"to": filename
})
else:
# print ("NOT A GOOD FN")
# print (context)
pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "}":
context = removeAfterHighestIndexOf("{", context)
while len(context) > 0 and (\
context[-1] in C_RESERVED_WORDS or \
re.match("fn_.+?:",context[-1])):
# while loop also removes function type specifiers
context = context[:-1]
elif c == "[":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "]":
context = removeAfterHighestIndexOf("[", context)
elif c == "'":
# TODO: Make escaping strings actually useful
if context.count("'") % 2 == 1:
context = removeAfterHighestIndexOf("'", context)
else:
context.append("'")
elif c == '"':
# TODO: Make escaping strings actually useful
if context.count('"') % 2 == 1:
context = removeAfterHighestIndexOf('"', context)
else:
context.append('"')
elif c == ";":
if len (context) > 0 and context[-1].startswith("fn_candidate:"):
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_call:" + functionName
callerName = filename+"_main"
# search up stack because we may be in an if-statement
for ctx in context:
if ctx.startswith("fn_definition:"):
callerName = ctx[len("fn_definition:"):]
break
fullCallName = callerName + "_call_" + functionName
objects.append({
"type": "function_call",
"name": fullCallName
})
relations.append({
"type": "calls",
"from": callerName,
"to": fullCallName
})
# relations.append({
# "type": "isCallOf",
# "from": fullCallName,
# "to": functionName
# })
# TODO: Temporal context; callsAfter
context = trimSemicolon(context)
elif c == "/":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
if len(context) > 0 and context[-1] == "/":
context[-1] = "//"
else:
context.append("/")
elif c in ["+","-","*", "&", "|", "!", "<" , ">", "?", "="]:
# Operators get a free pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif str.isspace(c):
# TODO: clean up len(context) == 0 chekcs.
if c == "\r" or c == "\n":
# pop off any line comments and any processor directives
context = removeAfterLowestIndexOf("//", context)
context = removeAfterLowestIndexOf("#", context)
if len(context) == 0:
context.append("")
if context[-1]!= "":
context.append("")
except IndexError as e:
# print (context)
raise e
return context, objects, relations
def removeAfterLowestIndexOf(item, context):
slicePoint = -1
for i,ctx in enumerate(context):
if ctx == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
def removeAfterHighestIndexOf(item, context):
# # print (f"sliced for {item} from ")
# # print (context)
slicePoint = -1
for i in range (len(context)-1, 0,-1):
if context[i] == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
# # print ("to")
# # print (context)
return context
C_RESERVED_WORDS = [
"auto",
"_Packed",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
]
def isIdentifier(identifier: str):
return re.match("^\w[\w\d]+$", identifier) is not None and \
identifier not in C_RESERVED_WORDS
def trimSemicolon(context):
slicePoint = 0 # willing to delete everything
if len(context)>0:
for i in range (len(context)-1, 0, -1):
if context[i] == "{":
slicePoint = i
break
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
import re
import os
# HEURISTIC: extensions
def getFilesAndFolders(path: str, extensions = [".cpp","c"]):
objects = [{
"type":"folder",
"name": "root"
}]
relations = []
for root, dirs, files in os.walk(path):
subRoot = "root"+root[len(path) :]
# HEURISTIC: ignore .git
if ".git" in subRoot:
continue
for dir in dirs:
fullFolderName = subRoot + os.path.sep + dir
objects.append({
"type":"folder",
"name": fullFolderName
})
relations.append({
"type": "in",
"from": fullFolderName,
"to": subRoot
})
for file in files:
for ext in extensions:
if file.endswith(ext):
fullFileName = subRoot + os.path.sep + file
objects.append({
"type":"file",
"name": fullFileName
})
relations.append({
"type": "in",
"from": fullFileName,
"to": subRoot
})
return objects, relations
def getFunctions(path, fileObjects, language):
objects = [{
"type":"function_definition",
"name": "global"
}]
relations = []
files = filter(lambda obj: obj["type"]=="file", fileObjects)
# Read each file
for file in files:
objects, relations = mineFile(path, file["name"], objects, relations)
return objects, relations
def mineFile(basePath, filename, objects, relations):
with open (basePath + filename[len("root"):]) as f:
context = []
c = " "
objects.append({
"type": "function_definition",
"name": filename+"_main"
})
relations.append({
"type": "in",
"from": filename+"_main",
"to": filename
})
while c:
c = f.read(1)
context, objects, relations = stateReduceCharacter(context, objects, relations, filename, c)
return objects, relations
# TODO: reduce number of arguments of this function
def stateReduceCharacter(context, objects, relations, filename, c):
try:
if re.match("[a-zA-Z0-9_-]",c):
if len(context) == 0:
context.append("")
context[len(context)-1] += c
else:
if c == "(":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == ")":
# print ("pre")
# print (context)
context = removeAfterHighestIndexOf("(", context)
if len (context) > 0 and context[-1] == "if" or context[-1] == "while":
context = context[:-1]
elif isIdentifier(context[-1]):
# This is either a function call or a function declaration, depending on
# whether we see a ; or a { next
# So flag it as a function candidate
context[-1] = "fn_candidate:" + context[-1]
# print (context)
elif c == "{":
if len (context) > 0 and context[-1] == "" and len(context) > 1:
context = context[:-1]
# upgrade previous fn candidate to real function
if context[-1].startswith("fn_candidate:"):
# print (context)
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_definition:" + functionName
# print (functionName)
objects.append({
"type": "function_definition",
"name": functionName
})
relations.append({
"type": "in",
"from": functionName,
"to": filename
})
else:
# print ("NOT A GOOD FN")
# print (context)
pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "}":
context = removeAfterHighestIndexOf("{", context)
while len(context) > 0 and (\
context[-1] in C_RESERVED_WORDS or \
re.match("fn_.+?:",context[-1])):
# while loop also removes function type specifiers
context = context[:-1]
elif c == "[":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "]":
context = removeAfterHighestIndexOf("[", context)
elif c == "'":
# TODO: Make escaping strings actually useful
if context.count("'") % 2 == 1:
context = removeAfterHighestIndexOf("'", context)
else:
context.append("'")
elif c == '"':
# TODO: Make escaping strings actually useful
if context.count('"') % 2 == 1:
context = removeAfterHighestIndexOf('"', context)
else:
context.append('"')
elif c == ";":
if len (context) > 0 and context[-1].startswith("fn_candidate:"):
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_call:" + functionName
callerName = filename+"_main"
# search up stack because we may be in an if-statement
for ctx in context:
if ctx.startswith("fn_definition:"):
callerName = ctx[len("fn_definition:"):]
break
fullCallName = callerName + "_call_" + functionName
objects.append({
"type": "function_call",
"name": fullCallName
})
relations.append({
"type": "calls",
"from": callerName,
"to": fullCallName
})
# relations.append({
# "type": "isCallOf",
# "from": fullCallName,
# "to": functionName
# })
# TODO: Temporal context; callsAfter
context = trimSemicolon(context)
elif c == "/":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
if len(context) > 0 and context[-1] == "/":
context[-1] = "//"
else:
context.append("/")
elif c in ["+","-","*", "&", "|", "!", "<" , ">", "?", "="]:
# Operators get a free pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif str.isspace(c):
# TODO: clean up len(context) == 0 chekcs.
if c == "\r" or c == "\n":
# pop off any line comments and any processor directives
context = removeAfterLowestIndexOf("//", context)
context = removeAfterLowestIndexOf("#", context)
if len(context) == 0:
context.append("")
if context[-1]!= "":
context.append("")
except IndexError as e:
# print (context)
raise e
return context, objects, relations
def removeAfterLowestIndexOf(item, context):
slicePoint = -1
for i,ctx in enumerate(context):
if ctx == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
def removeAfterHighestIndexOf(item, context):
# # print (f"sliced for {item} from ")
# # print (context)
slicePoint = -1
for i in range (len(context)-1, 0,-1):
if context[i] == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
# # print ("to")
# # print (context)
return context
C_RESERVED_WORDS = [
"auto",
"_Packed",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
]
def isIdentifier(identifier: str):
return re.match("^\w[\w\d]+$", identifier) is not None and \
identifier not in C_RESERVED_WORDS
def trimSemicolon(context):
slicePoint = 0 # willing to delete everything
if len(context)>0:
for i in range (len(context)-1, 0, -1):
if context[i] == "{":
slicePoint = i
break
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
import re
import os
# HEURISTIC: extensions
def getFilesAndFolders(path: str, extensions = [".cpp","c"]):
objects = [{
"type":"folder",
"name": "root"
}]
relations = []
for root, dirs, files in os.walk(path):
subRoot = "root"+root[len(path) :]
# HEURISTIC: ignore .git
if ".git" in subRoot:
continue
for dir in dirs:
fullFolderName = subRoot + os.path.sep + dir
objects.append({
"type":"folder",
"name": fullFolderName
})
relations.append({
"type": "in",
"from": fullFolderName,
"to": subRoot
})
for file in files:
for ext in extensions:
if file.endswith(ext):
fullFileName = subRoot + os.path.sep + file
objects.append({
"type":"file",
"name": fullFileName
})
relations.append({
"type": "in",
"from": fullFileName,
"to": subRoot
})
return objects, relations
def getFunctions(path, fileObjects, language):
objects = [{
"type":"function_definition",
"name": "global"
}]
relations = []
files = filter(lambda obj: obj["type"]=="file", fileObjects)
# Read each file
for file in files:
objects, relations = mineFile(path, file["name"], objects, relations)
return objects, relations
def mineFile(basePath, filename, objects, relations):
with open (basePath + filename[len("root"):]) as f:
context = []
c = " "
objects.append({
"type": "function_definition",
"name": filename+"_main"
})
relations.append({
"type": "in",
"from": filename+"_main",
"to": filename
})
while c:
c = f.read(1)
context, objects, relations = stateReduceCharacter(context, objects, relations, filename, c)
return objects, relations
# TODO: reduce number of arguments of this function
def stateReduceCharacter(context, objects, relations, filename, c):
try:
if re.match("[a-zA-Z0-9_-]",c):
if len(context) == 0:
context.append("")
context[len(context)-1] += c
else:
if c == "(":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == ")":
# print ("pre")
# print (context)
context = removeAfterHighestIndexOf("(", context)
if len (context) > 0 and context[-1] == "if" or context[-1] == "while":
context = context[:-1]
elif isIdentifier(context[-1]):
# This is either a function call or a function declaration, depending on
# whether we see a ; or a { next
# So flag it as a function candidate
context[-1] = "fn_candidate:" + context[-1]
# print (context)
elif c == "{":
if len (context) > 0 and context[-1] == "" and len(context) > 1:
context = context[:-1]
# upgrade previous fn candidate to real function
if context[-1].startswith("fn_candidate:"):
# print (context)
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_definition:" + functionName
# print (functionName)
objects.append({
"type": "function_definition",
"name": functionName
})
relations.append({
"type": "in",
"from": functionName,
"to": filename
})
else:
# print ("NOT A GOOD FN")
# print (context)
pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "}":
context = removeAfterHighestIndexOf("{", context)
while len(context) > 0 and (\
context[-1] in C_RESERVED_WORDS or \
re.match("fn_.+?:",context[-1])):
# while loop also removes function type specifiers
context = context[:-1]
elif c == "[":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "]":
context = removeAfterHighestIndexOf("[", context)
elif c == "'":
# TODO: Make escaping strings actually useful
if context.count("'") % 2 == 1:
context = removeAfterHighestIndexOf("'", context)
else:
context.append("'")
elif c == '"':
# TODO: Make escaping strings actually useful
if context.count('"') % 2 == 1:
context = removeAfterHighestIndexOf('"', context)
else:
context.append('"')
elif c == ";":
if len (context) > 0 and context[-1].startswith("fn_candidate:"):
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_call:" + functionName
callerName = filename+"_main"
# search up stack because we may be in an if-statement
for ctx in context:
if ctx.startswith("fn_definition:"):
callerName = ctx[len("fn_definition:"):]
break
fullCallName = callerName + "_call_" + functionName
objects.append({
"type": "function_call",
"name": fullCallName
})
relations.append({
"type": "calls",
"from": callerName,
"to": fullCallName
})
# relations.append({
# "type": "isCallOf",
# "from": fullCallName,
# "to": functionName
# })
# TODO: Temporal context; callsAfter
context = trimSemicolon(context)
elif c == "/":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
if len(context) > 0 and context[-1] == "/":
context[-1] = "//"
else:
context.append("/")
elif c in ["+","-","*", "&", "|", "!", "<" , ">", "?", "="]:
# Operators get a free pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif str.isspace(c):
# TODO: clean up len(context) == 0 chekcs.
if c == "\r" or c == "\n":
# pop off any line comments and any processor directives
context = removeAfterLowestIndexOf("//", context)
context = removeAfterLowestIndexOf("#", context)
if len(context) == 0:
context.append("")
if context[-1]!= "":
context.append("")
except IndexError as e:
# print (context)
raise e
return context, objects, relations
def removeAfterLowestIndexOf(item, context):
slicePoint = -1
for i,ctx in enumerate(context):
if ctx == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
def removeAfterHighestIndexOf(item, context):
# # print (f"sliced for {item} from ")
# # print (context)
slicePoint = -1
for i in range (len(context)-1, 0,-1):
if context[i] == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
# # print ("to")
# # print (context)
return context
C_RESERVED_WORDS = [
"auto",
"_Packed",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
]
def isIdentifier(identifier: str):
return re.match("^\w[\w\d]+$", identifier) is not None and \
identifier not in C_RESERVED_WORDS
def trimSemicolon(context):
slicePoint = 0 # willing to delete everything
if len(context)>0:
for i in range (len(context)-1, 0, -1):
if context[i] == "{":
slicePoint = i
break
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
import re
import os
# HEURISTIC: extensions
def getFilesAndFolders(path: str, extensions = [".cpp","c"]):
objects = [{
"type":"folder",
"name": "root"
}]
relations = []
for root, dirs, files in os.walk(path):
subRoot = "root"+root[len(path) :]
# HEURISTIC: ignore .git
if ".git" in subRoot:
continue
for dir in dirs:
fullFolderName = subRoot + os.path.sep + dir
objects.append({
"type":"folder",
"name": fullFolderName
})
relations.append({
"type": "in",
"from": fullFolderName,
"to": subRoot
})
for file in files:
for ext in extensions:
if file.endswith(ext):
fullFileName = subRoot + os.path.sep + file
objects.append({
"type":"file",
"name": fullFileName
})
relations.append({
"type": "in",
"from": fullFileName,
"to": subRoot
})
return objects, relations
def getFunctions(path, fileObjects, language):
objects = [{
"type":"function_definition",
"name": "global"
}]
relations = []
files = filter(lambda obj: obj["type"]=="file", fileObjects)
# Read each file
for file in files:
objects, relations = mineFile(path, file["name"], objects, relations)
return objects, relations
def mineFile(basePath, filename, objects, relations):
with open (basePath + filename[len("root"):]) as f:
context = []
c = " "
objects.append({
"type": "function_definition",
"name": filename+"_main"
})
relations.append({
"type": "in",
"from": filename+"_main",
"to": filename
})
while c:
c = f.read(1)
context, objects, relations = stateReduceCharacter(context, objects, relations, filename, c)
return objects, relations
# TODO: reduce number of arguments of this function
def stateReduceCharacter(context, objects, relations, filename, c):
try:
if re.match("[a-zA-Z0-9_-]",c):
if len(context) == 0:
context.append("")
context[len(context)-1] += c
else:
if c == "(":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == ")":
# print ("pre")
# print (context)
context = removeAfterHighestIndexOf("(", context)
if len (context) > 0 and context[-1] == "if" or context[-1] == "while":
context = context[:-1]
elif isIdentifier(context[-1]):
# This is either a function call or a function declaration, depending on
# whether we see a ; or a { next
# So flag it as a function candidate
context[-1] = "fn_candidate:" + context[-1]
# print (context)
elif c == "{":
if len (context) > 0 and context[-1] == "" and len(context) > 1:
context = context[:-1]
# upgrade previous fn candidate to real function
if context[-1].startswith("fn_candidate:"):
# print (context)
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_definition:" + functionName
# print (functionName)
objects.append({
"type": "function_definition",
"name": functionName
})
relations.append({
"type": "in",
"from": functionName,
"to": filename
})
else:
# print ("NOT A GOOD FN")
# print (context)
pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "}":
context = removeAfterHighestIndexOf("{", context)
while len(context) > 0 and (\
context[-1] in C_RESERVED_WORDS or \
re.match("fn_.+?:",context[-1])):
# while loop also removes function type specifiers
context = context[:-1]
elif c == "[":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "]":
context = removeAfterHighestIndexOf("[", context)
elif c == "'":
# TODO: Make escaping strings actually useful
if context.count("'") % 2 == 1:
context = removeAfterHighestIndexOf("'", context)
else:
context.append("'")
elif c == '"':
# TODO: Make escaping strings actually useful
if context.count('"') % 2 == 1:
context = removeAfterHighestIndexOf('"', context)
else:
context.append('"')
elif c == ";":
if len (context) > 0 and context[-1].startswith("fn_candidate:"):
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_call:" + functionName
callerName = filename+"_main"
# search up stack because we may be in an if-statement
for ctx in context:
if ctx.startswith("fn_definition:"):
callerName = ctx[len("fn_definition:"):]
break
fullCallName = callerName + "_call_" + functionName
objects.append({
"type": "function_call",
"name": fullCallName
})
relations.append({
"type": "calls",
"from": callerName,
"to": fullCallName
})
# relations.append({
# "type": "isCallOf",
# "from": fullCallName,
# "to": functionName
# })
# TODO: Temporal context; callsAfter
context = trimSemicolon(context)
elif c == "/":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
if len(context) > 0 and context[-1] == "/":
context[-1] = "//"
else:
context.append("/")
elif c in ["+","-","*", "&", "|", "!", "<" , ">", "?", "="]:
# Operators get a free pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif str.isspace(c):
# TODO: clean up len(context) == 0 chekcs.
if c == "\r" or c == "\n":
# pop off any line comments and any processor directives
context = removeAfterLowestIndexOf("//", context)
context = removeAfterLowestIndexOf("#", context)
if len(context) == 0:
context.append("")
if context[-1]!= "":
context.append("")
except IndexError as e:
# print (context)
raise e
return context, objects, relations
def removeAfterLowestIndexOf(item, context):
slicePoint = -1
for i,ctx in enumerate(context):
if ctx == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
def removeAfterHighestIndexOf(item, context):
# # print (f"sliced for {item} from ")
# # print (context)
slicePoint = -1
for i in range (len(context)-1, 0,-1):
if context[i] == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
# # print ("to")
# # print (context)
return context
C_RESERVED_WORDS = [
"auto",
"_Packed",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
]
def isIdentifier(identifier: str):
return re.match("^\w[\w\d]+$", identifier) is not None and \
identifier not in C_RESERVED_WORDS
def trimSemicolon(context):
slicePoint = 0 # willing to delete everything
if len(context)>0:
for i in range (len(context)-1, 0, -1):
if context[i] == "{":
slicePoint = i
break
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
print (context)
whether we see a ; or a { next
So flag it as a function candidate
print (context)
import re
import os
# HEURISTIC: extensions
def getFilesAndFolders(path: str, extensions = [".cpp","c"]):
objects = [{
"type":"folder",
"name": "root"
}]
relations = []
for root, dirs, files in os.walk(path):
subRoot = "root"+root[len(path) :]
# HEURISTIC: ignore .git
if ".git" in subRoot:
continue
for dir in dirs:
fullFolderName = subRoot + os.path.sep + dir
objects.append({
"type":"folder",
"name": fullFolderName
})
relations.append({
"type": "in",
"from": fullFolderName,
"to": subRoot
})
for file in files:
for ext in extensions:
if file.endswith(ext):
fullFileName = subRoot + os.path.sep + file
objects.append({
"type":"file",
"name": fullFileName
})
relations.append({
"type": "in",
"from": fullFileName,
"to": subRoot
})
return objects, relations
def getFunctions(path, fileObjects, language):
objects = [{
"type":"function_definition",
"name": "global"
}]
relations = []
files = filter(lambda obj: obj["type"]=="file", fileObjects)
# Read each file
for file in files:
objects, relations = mineFile(path, file["name"], objects, relations)
return objects, relations
def mineFile(basePath, filename, objects, relations):
with open (basePath + filename[len("root"):]) as f:
context = []
c = " "
objects.append({
"type": "function_definition",
"name": filename+"_main"
})
relations.append({
"type": "in",
"from": filename+"_main",
"to": filename
})
while c:
c = f.read(1)
context, objects, relations = stateReduceCharacter(context, objects, relations, filename, c)
return objects, relations
# TODO: reduce number of arguments of this function
def stateReduceCharacter(context, objects, relations, filename, c):
try:
if re.match("[a-zA-Z0-9_-]",c):
if len(context) == 0:
context.append("")
context[len(context)-1] += c
else:
if c == "(":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == ")":
# print ("pre")
# print (context)
context = removeAfterHighestIndexOf("(", context)
if len (context) > 0 and context[-1] == "if" or context[-1] == "while":
context = context[:-1]
elif isIdentifier(context[-1]):
# This is either a function call or a function declaration, depending on
# whether we see a ; or a { next
# So flag it as a function candidate
context[-1] = "fn_candidate:" + context[-1]
# print (context)
elif c == "{":
if len (context) > 0 and context[-1] == "" and len(context) > 1:
context = context[:-1]
# upgrade previous fn candidate to real function
if context[-1].startswith("fn_candidate:"):
# print (context)
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_definition:" + functionName
# print (functionName)
objects.append({
"type": "function_definition",
"name": functionName
})
relations.append({
"type": "in",
"from": functionName,
"to": filename
})
else:
# print ("NOT A GOOD FN")
# print (context)
pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "}":
context = removeAfterHighestIndexOf("{", context)
while len(context) > 0 and (\
context[-1] in C_RESERVED_WORDS or \
re.match("fn_.+?:",context[-1])):
# while loop also removes function type specifiers
context = context[:-1]
elif c == "[":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif c == "]":
context = removeAfterHighestIndexOf("[", context)
elif c == "'":
# TODO: Make escaping strings actually useful
if context.count("'") % 2 == 1:
context = removeAfterHighestIndexOf("'", context)
else:
context.append("'")
elif c == '"':
# TODO: Make escaping strings actually useful
if context.count('"') % 2 == 1:
context = removeAfterHighestIndexOf('"', context)
else:
context.append('"')
elif c == ";":
if len (context) > 0 and context[-1].startswith("fn_candidate:"):
functionName = context[-1][len("fn_candidate:"):]
context[-1] = "fn_call:" + functionName
callerName = filename+"_main"
# search up stack because we may be in an if-statement
for ctx in context:
if ctx.startswith("fn_definition:"):
callerName = ctx[len("fn_definition:"):]
break
fullCallName = callerName + "_call_" + functionName
objects.append({
"type": "function_call",
"name": fullCallName
})
relations.append({
"type": "calls",
"from": callerName,
"to": fullCallName
})
# relations.append({
# "type": "isCallOf",
# "from": fullCallName,
# "to": functionName
# })
# TODO: Temporal context; callsAfter
context = trimSemicolon(context)
elif c == "/":
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
if len(context) > 0 and context[-1] == "/":
context[-1] = "//"
else:
context.append("/")
elif c in ["+","-","*", "&", "|", "!", "<" , ">", "?", "="]:
# Operators get a free pass
if len (context) > 0 and context[-1] == "":
# dirty hack - this check should be applied more broadly
context[-1] = c
else:
context.append(c)
context.append("")
elif str.isspace(c):
# TODO: clean up len(context) == 0 chekcs.
if c == "\r" or c == "\n":
# pop off any line comments and any processor directives
context = removeAfterLowestIndexOf("//", context)
context = removeAfterLowestIndexOf("#", context)
if len(context) == 0:
context.append("")
if context[-1]!= "":
context.append("")
except IndexError as e:
# print (context)
raise e
return context, objects, relations
def removeAfterLowestIndexOf(item, context):
slicePoint = -1
for i,ctx in enumerate(context):
if ctx == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
def removeAfterHighestIndexOf(item, context):
# # print (f"sliced for {item} from ")
# # print (context)
slicePoint = -1
for i in range (len(context)-1, 0,-1):
if context[i] == item:
slicePoint = i
break
if slicePoint > -1:
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
# # print ("to")
# # print (context)
return context
C_RESERVED_WORDS = [
"auto",
"_Packed",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
]
def isIdentifier(identifier: str):
return re.match("^\w[\w\d]+$", identifier) is not None and \
identifier not in C_RESERVED_WORDS
def trimSemicolon(context):
slicePoint = 0 # willing to delete everything
if len(context)>0:
for i in range (len(context)-1, 0, -1):
if context[i] == "{":
slicePoint = i
break
context = context [0:slicePoint]
if len(context) == 0:
context.append("")
return context
- https://www.gituml.com/
- An attempt to create UML diagrams from code. However, it does not have nesting like plantUML has, which is a key capability I aim to exploit in this project.
## Installation and usage
### Installation
### Usage
<!-- In future this will be autogenerated from the User Guide -->
- See sections marked [USER ACTION] in `docs/1. V1.0 User Guide.md`.
- [ ] TODO on commit, fetch actions and deposit them here.
from fastapi.responses import HTMLResponse, PlainTextResponse
import os
from pathlib import Path
from watchfiles import awatch, Change
import asyncio
import re
import code2dia
import logging
class Server:
def __init__(self, DIAGRAM_DEFINITION_FILE):
self.DIAGRAM_DEFINITION_FILE = DIAGRAM_DEFINITION_FILE
self.DIAGRAM_OUTPUT_FILE = re.sub("\.py$", ".svg", DIAGRAM_DEFINITION_FILE)
app = FastAPI()
self.app = app
self.websocket = None
# TODO: The viewer should be able to zoom in and out.
# TODO: Move this html string to an actual file
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>{DIAGRAM_DEFINITION_FILE}</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/svg-pan-zoom.min.js"></script>
</head>
<body style="margin:0">
<div id="container" style="width: 90vw; height: 90vh; border:1px solid black; ">
</div>
<script>
var ws = new WebSocket("ws://localhost:8000/ws");
ws.onmessage = function(event) {{
if (event.data=="reload"){{
reload();
}}
}};
const reload = async ()=>{{
const diagramResponse = await fetch("/diagram");
const diagramText = await diagramResponse.text()
const containerEl = document.querySelector("#container");
containerEl.innerHTML = diagramText;
const svgElement = containerEl.children[0];
svgElement.style.cssText="display: inline; width: inherit; min-width: inherit; max-width: inherit; height: inherit; min-height: inherit; max-height: inherit;";
svgPanZoom(svgElement, {{
zoomEnabled: true,
controlIconsEnabled: true,
fit: true,
center: true
}});
}}
reload();
</script>
</body>
</html>
"""
self.queue = asyncio.Queue()
@app.on_event("startup")
async def startup_event():
asyncio.create_task(self.watchFile())
@app.get("/")
async def get():
return HTMLResponse(html)
@app.get("/diagram")
async def get():
diagramText = ""
if not os.path.exists(self.DIAGRAM_OUTPUT_FILE):
await self.generateSVG()
with open(self.DIAGRAM_OUTPUT_FILE) as f:
diagramText = f.read()
return PlainTextResponse(diagramText)
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
self.websocket = websocket
while True:
data = await websocket.receive_text()
await websocket.send_text(data)
async def generateSVG(self):
# TODO: Remove any file IO from SVG generation
# A .pu and a .svg file are written to disk by code2dia/generator.pym,
# as part of the generation process.
# Or we can at least make this optional.
# Reload the file
DIAGRAM_PLANTUML_FILE = re.sub("\.py$", ".pu", self.DIAGRAM_DEFINITION_FILE)
proc = await asyncio.create_subprocess_shell(
f"python {self.DIAGRAM_DEFINITION_FILE}"
)
await proc.communicate()
content = await code2dia.generator.generateSVG(DIAGRAM_PLANTUML_FILE)
with open(self.DIAGRAM_OUTPUT_FILE, "wb") as f:
f.write(content)
async def watchFile(self):
async for changes in awatch(self.DIAGRAM_DEFINITION_FILE):
for change in changes:
if change[0] == Change.modified:
await self.generateSVG()
if self.websocket is not None:
await self.websocket.send_text("reload")
from fastapi.responses import HTMLResponse, PlainTextResponse
import os
from pathlib import Path
from watchfiles import awatch, Change
import asyncio
import re
import code2dia
import logging
class Server:
def __init__(self, DIAGRAM_DEFINITION_FILE):
self.DIAGRAM_DEFINITION_FILE = DIAGRAM_DEFINITION_FILE
self.DIAGRAM_OUTPUT_FILE = re.sub("\.py$", ".svg", DIAGRAM_DEFINITION_FILE)
app = FastAPI()
self.app = app
self.websocket = None
# TODO: The viewer should be able to zoom in and out.
# TODO: Move this html string to an actual file
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>{DIAGRAM_DEFINITION_FILE}</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/svg-pan-zoom.min.js"></script>
</head>
<body style="margin:0">
<div id="container" style="width: 90vw; height: 90vh; border:1px solid black; ">
</div>
<script>
var ws = new WebSocket("ws://localhost:8000/ws");
ws.onmessage = function(event) {{
if (event.data=="reload"){{
reload();
}}
}};
const reload = async ()=>{{
const diagramResponse = await fetch("/diagram");
const diagramText = await diagramResponse.text()
const containerEl = document.querySelector("#container");
containerEl.innerHTML = diagramText;
const svgElement = containerEl.children[0];
svgElement.style.cssText="display: inline; width: inherit; min-width: inherit; max-width: inherit; height: inherit; min-height: inherit; max-height: inherit;";
svgPanZoom(svgElement, {{
zoomEnabled: true,
controlIconsEnabled: true,
fit: true,
center: true
}});
}}
reload();
</script>
</body>
</html>
"""
self.queue = asyncio.Queue()
@app.on_event("startup")
async def startup_event():
asyncio.create_task(self.watchFile())
@app.get("/")
async def get():
return HTMLResponse(html)
@app.get("/diagram")
async def get():
diagramText = ""
if not os.path.exists(self.DIAGRAM_OUTPUT_FILE):
await self.generateSVG()
with open(self.DIAGRAM_OUTPUT_FILE) as f:
diagramText = f.read()
return PlainTextResponse(diagramText)
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
self.websocket = websocket
while True:
data = await websocket.receive_text()
await websocket.send_text(data)
async def generateSVG(self):
# TODO: Remove any file IO from SVG generation
# A .pu and a .svg file are written to disk by code2dia/generator.pym,
# as part of the generation process.
# Or we can at least make this optional.
# Reload the file
DIAGRAM_PLANTUML_FILE = re.sub("\.py$", ".pu", self.DIAGRAM_DEFINITION_FILE)
proc = await asyncio.create_subprocess_shell(
f"python {self.DIAGRAM_DEFINITION_FILE}"
)
await proc.communicate()
content = await code2dia.generator.generateSVG(DIAGRAM_PLANTUML_FILE)
with open(self.DIAGRAM_OUTPUT_FILE, "wb") as f:
f.write(content)
async def watchFile(self):
async for changes in awatch(self.DIAGRAM_DEFINITION_FILE):
for change in changes:
if change[0] == Change.modified:
await self.generateSVG()
if self.websocket is not None:
await self.websocket.send_text("reload")
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.