Source code for llvm_ir_dataset_utils.builders.julia_builder

"""Module for building and extracting bitcode from Julia applications"""

import subprocess
import os
import pathlib
import json
import logging
import shutil
import glob

from mlgo.corpus import make_corpus_lib
"""
Generates the command to compile a bitcode archive from a Julia package.
The archive then needs to be unpacked with `ar -x`.
"""


[docs]def generate_build_command(package_to_build, thread_count): command_vector = [ "julia", "--threads", f"{thread_count}", "--quiet", ] # Close out the Julia command line switches command_vector.append("--") julia_builder_jl_path = os.path.join( os.path.dirname(__file__), 'julia_builder.jl') command_vector.append(julia_builder_jl_path) # Add the package to build command_vector.append(package_to_build) return command_vector
[docs]def perform_build(package_name, build_dir, corpus_dir, thread_count): build_command_vector = generate_build_command(package_name, thread_count) build_log_name = f'./{package_name}.build.log' build_log_path = os.path.join(corpus_dir, build_log_name) environment = os.environ.copy() julia_depot_path = os.path.join(build_dir, 'julia_depot') environment['JULIA_DEPOT_PATH'] = julia_depot_path environment['JULIA_PKG_SERVER'] = '' julia_bc_path = os.path.join(build_dir, 'unopt_bc') os.mkdir(julia_bc_path) environment['JULIA_PKG_UNOPT_BITCODE_DIR'] = julia_bc_path environment['JULIA_IMAGE_THREADS'] = '1' environment['JULIA_CPU_TARGET'] = 'x86-64' try: with open(build_log_path, 'w') as build_log_file: subprocess.run( build_command_vector, cwd=build_dir, stdout=build_log_file, stderr=build_log_file, env=environment, check=True) except subprocess.SubprocessError: logging.warn(f'Failed to build julia package {package_name}') build_success = False else: build_success = True if build_success: extract_ir(build_dir, corpus_dir) return { 'targets': [{ 'success': build_success, 'build_log': build_log_name, 'name': package_name }] }
[docs]def unpack_archives(unopt_bc_archive_dir, unopt_bc_dir): archive_files = os.listdir(unopt_bc_archive_dir) for archive_file in archive_files: full_archive_file_path = os.path.join(unopt_bc_archive_dir, archive_file) # Strip the last two characters which will be the .a in the extensions archive_package_name = archive_file[:-2] archive_extraction_command_vector = ['llvm-ar', '-x', archive_file] subprocess.run( archive_extraction_command_vector, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, cwd=unopt_bc_archive_dir) # Copy text_opt#0.bc to the output directory unopt_bitcode_full_path = os.path.join(unopt_bc_archive_dir, 'text_unopt#0.bc') copied_bitcode_full_path = os.path.join(unopt_bc_dir, f'{archive_package_name}.bc') shutil.copyfile(unopt_bitcode_full_path, copied_bitcode_full_path) # Delete all bitcode files from the current extraction in preparation # for the next archive. for bitcode_file in glob.glob(os.path.join(unopt_bc_archive_dir, '*.bc')): os.remove(bitcode_file) os.remove(full_archive_file_path)
[docs]def extract_ir(build_dir, corpus_dir): unopt_bc_dir = os.path.join(build_dir, 'unopt_bc') output_bc_dir = os.path.join(build_dir, 'output_bc') os.mkdir(output_bc_dir) unpack_archives(unopt_bc_dir, output_bc_dir) relative_paths = make_corpus_lib.load_bitcode_from_directory(output_bc_dir) make_corpus_lib.copy_bitcode(relative_paths, output_bc_dir, corpus_dir) make_corpus_lib.write_corpus_manifest(relative_paths, corpus_dir, '')