Source code for llvm_ir_dataset_utils.builders.manual_builder

"""Module for building and extracting bitcode from applications using an
arbitrary build system by manually running specified commands."""

import subprocess
import os

from mlgo.corpus import extract_ir_lib
from mlgo.corpus import make_corpus_lib

BUILD_LOG_NAME = './build.log'


[docs]def perform_build(commands_list, build_dir, threads, corpus_dir, environment_variables): command_statuses = [] build_log_path = os.path.join(corpus_dir, BUILD_LOG_NAME) for command in commands_list: environment = os.environ.copy() environment['JOBS'] = str(threads) for environment_variable in environment_variables: environment[environment_variable] = environment_variables[ environment_variable] with open(build_log_path, 'w') as build_log_file: build_process = subprocess.run( command, cwd=build_dir, env=environment, shell=True, stderr=build_log_file, stdout=build_log_file) command_statuses.append(build_process.returncode == 0) overall_success = True for command_status in command_statuses: if not command_status: overall_success = False break return { 'targets': [{ 'success': overall_success, 'build_log': BUILD_LOG_NAME, 'name': os.path.basename(corpus_dir) }] }
[docs]def extract_ir(build_dir, corpus_dir, threads): objects = extract_ir_lib.load_from_directory(build_dir, corpus_dir) relative_output_paths = extract_ir_lib.run_extraction(objects, threads, "llvm-objcopy", None, None, ".llvmcmd", ".llvmbc") extract_ir_lib.write_corpus_manifest(None, relative_output_paths, corpus_dir)
[docs]def extract_raw_ir(build_dir, corpus_dir, threads): relative_paths = make_corpus_lib.load_bitcode_from_directory(build_dir) make_corpus_lib.copy_bitcode(relative_paths, build_dir, corpus_dir) make_corpus_lib.write_corpus_manifest(relative_paths, corpus_dir, '')