Source code for llvm_ir_dataset_utils.tools.spack_analyze_failures

"""A tool for finding spack build failures that break the most dependent
packages.
"""

import json
import csv
import os

from absl import app
from absl import flags

FLAGS = flags.FLAGS

flags.DEFINE_string(
    'build_failures', None,
    'The path to the CSV file of build failures from get_build_failures.py')
flags.DEFINE_string(
    'package_list', None,
    'The path to the package list jSON from get_spack_package_list.py')

flags.mark_flag_as_required('build_failures')
flags.mark_flag_as_required('package_list')


[docs]def get_dependents_dict(package_dependencies_dict): dependents_dict = {} for package in package_dependencies_dict: for package_dependency in package_dependencies_dict[package]['deps']: if package_dependency in dependents_dict: dependents_dict[package_dependency].append(package) else: dependents_dict[package_dependency] = [package] return dependents_dict
[docs]def get_dependents(package_hash, dependents_dict): dependents = [] if package_hash not in dependents_dict: return [] else: dependents.extend(dependents_dict[package_hash]) for dependent_package_hash in dependents_dict[package_hash]: dependents.extend(get_dependents(dependent_package_hash, dependents_dict)) return dependents
[docs]def deduplicate_list(to_deduplicate): return list(dict.fromkeys(to_deduplicate))
[docs]def main(_): with open(FLAGS.package_list) as package_list_file: package_dict = json.load(package_list_file) package_hash_failures = [] with open(FLAGS.build_failures) as build_failures_file: build_failures_reader = csv.reader(build_failures_file) for failure_row in build_failures_reader: # Exclude failures that happen because a dependency fails to build. if failure_row[2] != 'NULL': package_name_hash = os.path.dirname(failure_row[2]) # Cut off the last six characters to get rid of the .tar: at the # end of every line in an archived corpus. # TODO(boomanaiden154): Make this robust against usage in a non-archived # corpus. package_hash = package_name_hash.split('-')[1][:-6] package_hash_failures.append(package_hash) dependents_dict = get_dependents_dict(package_dict) failures_dependents = [] for failure_hash in package_hash_failures: # Deduplicate the list of dependents because we're not checking some # conditions while walking the dependents tree and this is a "cheap" way to # fix that. failures_dependents.append( (failure_hash, len(deduplicate_list(get_dependents(failure_hash, dependents_dict))))) failures_dependents.sort(key=lambda a: a[1]) for failure_dependents_pair in failures_dependents: print(f'{failure_dependents_pair[0]},{failure_dependents_pair[1]}')
if __name__ == '__main__': app.run(main)