You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
4.1 KiB
118 lines
4.1 KiB
2 years ago
|
"""
|
||
|
File: extract_code_python.py
|
||
|
Created Time: 2023-02-06
|
||
|
Author: Krahets (krahets@163.com)
|
||
|
"""
|
||
|
|
||
|
import re
|
||
|
import os
|
||
|
import os.path as osp
|
||
|
import glob
|
||
|
|
||
|
class ExtractCodeBlocksPython:
|
||
|
def __init__(self, file_path) -> None:
|
||
|
self.file_path = file_path
|
||
|
with open(file_path) as f:
|
||
|
self.lines = f.readlines()
|
||
|
self.content = "".join(self.lines)
|
||
|
|
||
|
# Regular expression pattern to match function names and class names
|
||
|
self.func_pattern = re.compile(r'(\s*)def\s+(\w+)\s*\(')
|
||
|
self.class_pattern = re.compile(r'class\s+(\w+)')
|
||
|
|
||
|
# Detect and extract all the classes and fucntions
|
||
|
self.classes = self.extract_class_blocks()
|
||
|
self.functions = self.extract_function_blocks()
|
||
|
|
||
|
def search_block(self, header_line, indentation):
|
||
|
"""
|
||
|
Search class/function block given the header_line and indentation
|
||
|
"""
|
||
|
start_line, end_line = 0, len(self.lines)
|
||
|
# Search the code
|
||
|
for i in range(header_line + 1, len(self.lines)):
|
||
|
if re.search("^\s*\n|^\s{ind}\s+.+\n".replace("ind", str(indentation)),
|
||
|
self.lines[i]) is None:
|
||
|
end_line = i
|
||
|
break
|
||
|
# Search the header comment
|
||
|
for i in range(header_line - 1, -1, -1):
|
||
|
if re.search('^\s{ind}""".+'.replace("ind", str(indentation)),
|
||
|
self.lines[i]) is not None:
|
||
|
start_line = i
|
||
|
break
|
||
|
func_block = self.lines[start_line:end_line]
|
||
|
# Remove empty lines at bottom
|
||
|
for i in range(len(func_block) - 1, -1, -1):
|
||
|
if re.search("^\s*\n", func_block[i]) is None:
|
||
|
break
|
||
|
end_line -= 1
|
||
|
|
||
|
return start_line, end_line, self.lines[start_line:end_line]
|
||
|
|
||
|
|
||
|
def extract_function_blocks(self, indentation=0, start_line=-1, end_line=-1):
|
||
|
"""
|
||
|
Extract all the functions with given indentation
|
||
|
"""
|
||
|
functions = {}
|
||
|
|
||
|
if start_line == -1:
|
||
|
start_line = 0
|
||
|
if end_line == -1:
|
||
|
end_line = len(self.lines) - 1
|
||
|
|
||
|
for line_num in range(start_line, end_line + 1):
|
||
|
# Search the function header
|
||
|
func_match = self.func_pattern.match(self.lines[line_num])
|
||
|
if func_match is None: continue
|
||
|
# The function should match the input indentation
|
||
|
if len(func_match.group(1)) != indentation: continue
|
||
|
header_line = line_num
|
||
|
|
||
|
# Search the block from the header line
|
||
|
start_line, end_line, func_block = self.search_block(header_line, indentation)
|
||
|
# Construct the functions dict
|
||
|
func_label = func_match.group(2)
|
||
|
functions[func_label] = {
|
||
|
"indentation": indentation,
|
||
|
"line_number": {
|
||
|
"start": start_line,
|
||
|
"end": end_line,
|
||
|
"header": header_line,
|
||
|
},
|
||
|
"block": func_block,
|
||
|
}
|
||
|
|
||
|
return functions
|
||
|
|
||
|
def extract_class_blocks(self):
|
||
|
"""
|
||
|
Extract all the classes with given indentation
|
||
|
"""
|
||
|
classes = {}
|
||
|
|
||
|
for line_num, line in enumerate(self.lines):
|
||
|
# Search the class header
|
||
|
class_match = self.class_pattern.match(line)
|
||
|
if class_match is None: continue
|
||
|
header_line = line_num
|
||
|
|
||
|
# Search the block from the header line
|
||
|
start_line, end_line, class_block = self.search_block(header_line, 0)
|
||
|
# Construct the classes dict
|
||
|
class_label = class_match.group(1)
|
||
|
classes[class_label] = {
|
||
|
"indentation": 0,
|
||
|
"line_number": {
|
||
|
"start": start_line,
|
||
|
"end": end_line,
|
||
|
"header": header_line,
|
||
|
},
|
||
|
"block": class_block,
|
||
|
"functions": self.extract_function_blocks(
|
||
|
indentation=4, start_line=start_line, end_line=end_line)
|
||
|
}
|
||
|
|
||
|
return classes
|