Skip to content

java

TREE_SITTER_QUERY = '(class_declaration name: (identifier) @class.name body: (class_body (method_declaration name: (identifier) @function.name) @function.definition))(class_declaration name: (identifier) @class.name body: (class_body (constructor_declaration name: (identifier) @function.name) @function.definition))(interface_declaration name: (identifier) @interface.name body: (interface_body (method_declaration name: (identifier) @function.name) @function.definition))(enum_declaration name: (identifier) @enum.name body: (enum_body (enum_body_declarations (method_declaration name: (identifier) @function.name) @function.definition)))(enum_declaration name: (identifier) @enum.name body: (enum_body (enum_body_declarations (constructor_declaration name: (identifier) @function.name) @function.definition)))(record_declaration name: (identifier) @record.name body: (class_body (method_declaration name: (identifier) @function.name) @function.definition))(record_declaration name: (identifier) @record.name body: (class_body (constructor_declaration name: (identifier) @function.name) @function.definition))' module-attribute

Tree-sitter query for extracting function names and definitions.

JavaExtractor

Bases: TreeSitterExtractor

Source code extractor for extracting Java functions.

Source code in src/codablellm/languages/java.py
class JavaExtractor(TreeSitterExtractor):
    """
    Source code extractor for extracting Java functions.
    """

    def __init__(self) -> None:
        super().__init__("Java", TREE_SITTER_QUERY)

    def get_extractable_files(self, path: PathLike) -> Set[Path]:
        return rglob_file_extensions(path, [".java"])

    @requires_extra("java", "Java source code extraction", "tree_sitter_java")
    def get_language(self) -> Language:
        return Language(tsj.language())  # type: ignore

    def is_installed(self) -> bool:
        return tsj is not None