Skip to content

API: collect societal signals

collect

collect(repo_path='.', files=None)

Collect ownership & social signal metrics for conflicted files.

By default operates on the set of currently conflicted files (from the in-progress merge). An explicit files iterable can be supplied to target arbitrary paths.

Signals include recency (age in days), author commit counts since merge bases, integrator prior activity, and an aggregated blame table.

Parameters:

Name Type Description Default
repo_path str

Filesystem path to the repository (defaults to current directory).

'.'
files Optional[Iterable[str]]

Optional iterable of repo-relative file paths; if omitted, only conflicted files are used.

None

Returns:

Type Description
dict[str, SocialSignalsRecord]

Mapping of file path to :class:SocialSignalsRecord.

Source code in conflict_collection/collectors/societal/collector.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def collect(
    repo_path: str = ".",
    files: Optional[Iterable[str]] = None,
) -> dict[str, SocialSignalsRecord]:
    """Collect ownership & social signal metrics for conflicted files.

    By default operates on the set of currently conflicted files (from the
    in-progress merge). An explicit ``files`` iterable can be supplied to
    target arbitrary paths.

    Signals include recency (age in days), author commit counts since merge
    bases, integrator prior activity, and an aggregated blame table.

    Args:
        repo_path: Filesystem path to the repository (defaults to current directory).
        files: Optional iterable of repo-relative file paths; if omitted, only conflicted files are used.

    Returns:
        Mapping of file path to :class:`SocialSignalsRecord`.
    """
    repo = Repo(repo_path)

    file_list = list(files) if files else conflicted_files(repo)
    if not file_list:
        return {}

    head_sha = rev_parse(repo, "HEAD")
    merge_sha = rev_parse(repo, "MERGE_HEAD")
    base_shas = merge_bases(repo, head_sha, merge_sha)
    epoch_head = commit_epoch(repo, head_sha)
    epoch_merge = commit_epoch(repo, merge_sha)
    ref_ts = max(epoch_head, epoch_merge)

    integrator = integrator_name(repo)

    results: dict[str, SocialSignalsRecord] = {}
    """Mapping from file path to SocialSignalsRecord"""

    for f in file_list:
        ours_last = last_commit_for_path(repo, head_sha, f)
        theirs_last = last_commit_for_path(repo, merge_sha, f)
        if ours_last is None:
            logging.error(
                f"Last commit for {f} not found on {repo} "
                f"starting from commit hash {head_sha}. "
                "Skipping file."
            )
            continue
        if theirs_last is None:
            logging.error(
                f"Last commit for {f} not found on {repo} "
                f"starting from commit hash {merge_sha}. "
                "Skipping file."
            )
            continue

        ours_author = commit_author_str(ours_last)
        theirs_author = commit_author_str(theirs_last)

        owner_commits_ours = count_commits_by_author_since_bases(
            repo, f, ours_author, base_shas, head_sha
        )
        owner_commits_theirs = count_commits_by_author_since_bases(
            repo, f, theirs_author, base_shas, merge_sha
        )

        age_days_ours = age_days(ref_ts, ours_last)
        age_days_theirs = age_days(ref_ts, theirs_last)

        integrator_prev = (
            count_commits_by_author(repo, f, integrator) if integrator else 0
        )

        blame_pairs = blame_aggregate(repo, head_sha, f)
        blame_table = [BlameEntry(author=a, lines=n) for a, n in blame_pairs]

        results[f] = SocialSignalsRecord(
            file=f,
            ours_author=ours_author,
            theirs_author=theirs_author,
            owner_commits_ours=owner_commits_ours,
            owner_commits_theirs=owner_commits_theirs,
            age_days_ours=age_days_ours,
            age_days_theirs=age_days_theirs,
            integrator_priors=IntegratorPriors(resolver_prev_commits=integrator_prev),
            blame_table=sorted(blame_table, key=lambda b: b.lines, reverse=True),
        )

    return results