Skip to content

Commit

Permalink
Merge pull request #604 from sbenthall/i602
Browse files Browse the repository at this point in the history
datatracker analysis module; script for getting working group leadership
  • Loading branch information
sbenthall authored Oct 10, 2023
2 parents 20b940d + b4d9ff2 commit 0bff60f
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
87 changes: 87 additions & 0 deletions bigbang/analysis/datatracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Scripts for processing data from the IETF DataTracker
"""

from ietfdata.datatracker import *
from ietfdata.datatracker_ext import *
from dateutil.parser import *

import pandas as pd
import re

dt = DataTrackerExt()

em_re = "/api/v1/person/email/([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7})/"


def email_from_uri(email_uri):
m = re.match(em_re, email_uri)

return m.group(1) if m else None


dt = DataTracker(use_cache=True)


def get_group_histories(wg_name):
"""
For a working group name, get the group history objects
associated with that working group.
"""
wg = dt.group_from_acronym(wg_name)
group_histories = dt.group_histories(group=wg)

group_role_histories = [
dt.group_role_histories(
group=grp_hist,
name=dt.role_name(RoleNameURI("/api/v1/name/rolename/chair/")),
)
for grp_hist in group_histories
]

return group_histories, group_role_histories


def leadership_ranges(group_acronym):
"""
For a working group acronym,
get the data about the changes to the Chair role
in that working group history.
"""
wg = dt.group_from_acronym(group_acronym)
group_histories = dt.group_histories(group=wg)

gh = list(group_histories)

gh_chair_records = [
[
{
"datetime_max": h.time,
"datetime_min": h.time,
"email": email_from_uri(r.email.uri),
"person_uri": r.person.uri,
"name": dt.person(r.person).name,
"biography": dt.person(r.person).biography,
}
for r in list(
dt.group_role_histories(
group=h,
name=dt.role_name(RoleNameURI("/api/v1/name/rolename/chair/")),
)
)
]
for h in gh
]

gh_chair_records = sum(gh_chair_records, [])
ghcr_df = pd.DataFrame.from_records(gh_chair_records)

agged = ghcr_df.groupby(["name", "person_uri", "email", "biography"]).agg(
{"datetime_min": "min", "datetime_max": "max"}
)

agged["datetime_min"].replace({ghcr_df["datetime_min"].min(): None}, inplace=True)

agged["datetime_max"].replace({ghcr_df["datetime_max"].max(): None}, inplace=True)

return ghcr_df, agged
1 change: 0 additions & 1 deletion bigbang/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def get_common_head(str1, str2, delimiter=None):
else:
# this is ugly control flow clean it
if delimiter is not None:

dstr1 = str1.split(delimiter)
dstr2 = str2.split(delimiter)

Expand Down
2 changes: 1 addition & 1 deletion examples/datatracker/Working Group Affiliations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.8.13"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
Expand Down

0 comments on commit 0bff60f

Please sign in to comment.