Source code for llvm_ir_dataset_utils.builders.julia_builder
"""Module for building and extracting bitcode from Julia applications"""
import subprocess
import os
import pathlib
import json
import logging
import shutil
import glob
from mlgo.corpus import make_corpus_lib
"""
Generates the command to compile a bitcode archive from a Julia package.
The archive then needs to be unpacked with `ar -x`.
"""
[docs]def generate_build_command(package_to_build, thread_count):
command_vector = [
"julia",
"--threads",
f"{thread_count}",
"--quiet",
]
# Close out the Julia command line switches
command_vector.append("--")
julia_builder_jl_path = os.path.join(
os.path.dirname(__file__), 'julia_builder.jl')
command_vector.append(julia_builder_jl_path)
# Add the package to build
command_vector.append(package_to_build)
return command_vector
[docs]def perform_build(package_name, build_dir, corpus_dir, thread_count):
build_command_vector = generate_build_command(package_name, thread_count)
build_log_name = f'./{package_name}.build.log'
build_log_path = os.path.join(corpus_dir, build_log_name)
environment = os.environ.copy()
julia_depot_path = os.path.join(build_dir, 'julia_depot')
environment['JULIA_DEPOT_PATH'] = julia_depot_path
environment['JULIA_PKG_SERVER'] = ''
julia_bc_path = os.path.join(build_dir, 'unopt_bc')
os.mkdir(julia_bc_path)
environment['JULIA_PKG_UNOPT_BITCODE_DIR'] = julia_bc_path
environment['JULIA_IMAGE_THREADS'] = '1'
environment['JULIA_CPU_TARGET'] = 'x86-64'
try:
with open(build_log_path, 'w') as build_log_file:
subprocess.run(
build_command_vector,
cwd=build_dir,
stdout=build_log_file,
stderr=build_log_file,
env=environment,
check=True)
except subprocess.SubprocessError:
logging.warn(f'Failed to build julia package {package_name}')
build_success = False
else:
build_success = True
if build_success:
extract_ir(build_dir, corpus_dir)
return {
'targets': [{
'success': build_success,
'build_log': build_log_name,
'name': package_name
}]
}
[docs]def unpack_archives(unopt_bc_archive_dir, unopt_bc_dir):
archive_files = os.listdir(unopt_bc_archive_dir)
for archive_file in archive_files:
full_archive_file_path = os.path.join(unopt_bc_archive_dir, archive_file)
# Strip the last two characters which will be the .a in the extensions
archive_package_name = archive_file[:-2]
archive_extraction_command_vector = ['llvm-ar', '-x', archive_file]
subprocess.run(
archive_extraction_command_vector,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
cwd=unopt_bc_archive_dir)
# Copy text_opt#0.bc to the output directory
unopt_bitcode_full_path = os.path.join(unopt_bc_archive_dir,
'text_unopt#0.bc')
copied_bitcode_full_path = os.path.join(unopt_bc_dir,
f'{archive_package_name}.bc')
shutil.copyfile(unopt_bitcode_full_path, copied_bitcode_full_path)
# Delete all bitcode files from the current extraction in preparation
# for the next archive.
for bitcode_file in glob.glob(os.path.join(unopt_bc_archive_dir, '*.bc')):
os.remove(bitcode_file)
os.remove(full_archive_file_path)
[docs]def extract_ir(build_dir, corpus_dir):
unopt_bc_dir = os.path.join(build_dir, 'unopt_bc')
output_bc_dir = os.path.join(build_dir, 'output_bc')
os.mkdir(output_bc_dir)
unpack_archives(unopt_bc_dir, output_bc_dir)
relative_paths = make_corpus_lib.load_bitcode_from_directory(output_bc_dir)
make_corpus_lib.copy_bitcode(relative_paths, output_bc_dir, corpus_dir)
make_corpus_lib.write_corpus_manifest(relative_paths, corpus_dir, '')