Source code for llvm_ir_dataset_utils.tools.extract_build_failure_logs

"""Tool to get build failure logs and copy them into a folder."""

import os
import shutil

from absl import app
from absl import flags

from llvm_ir_dataset_utils.util import dataset_corpus

FLAGS = flags.FLAGS

flags.DEFINE_string('corpus_dir', None, 'The corpus directory.')
flags.DEFINE_string(
    'build_failures', None,
    'The list of build failures from get_build_failure_logs.py')
flags.DEFINE_string('output_dir', None, 'The path to the output directory.')

flags.mark_flag_as_required('corpus_dir')
flags.mark_flag_as_required('build_failures')


[docs]def process_build_log(build_log_path): if ':' in build_log_path: # We have a tar archive, extract the file and write it to the output # directory. path_parts = build_log_path.split(':') build_log = dataset_corpus.load_file_from_corpus(path_parts[0], path_parts[1]) corpus_name = os.path.basename(path_parts[0])[:-4] output_file_path = os.path.join(FLAGS.output_dir, f'{corpus_name}.log') print(output_file_path) with open(output_file_path, 'wb') as output_file: output_file.write(build_log) else: # We have a normal file and con just copy it over. corpus_name = os.path.basename(os.path.dirname(build_log_path)) output_file_path = os.path.join(FLAGS.output_dir, f'{corpus_name}.log') shutil.copyfile(build_log_path, output_file_path)
[docs]def main(_): # TODO(boomanaiden154): Probably turn this into a CSV reader at some point, # but the other scripts shouldn't create any edge cases. with open(FLAGS.build_failures) as build_failures_file: build_failures = [line.rstrip() for line in build_failures_file] for build_failure in build_failures: failure_description_parts = build_failure.split(',') if failure_description_parts[2] != 'NULL': process_build_log(failure_description_parts[2])
if __name__ == '__main__': app.run(main)