Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
arie-matsliah committed Aug 14, 2023
1 parent 26bc109 commit 54d3557
Show file tree
Hide file tree
Showing 5 changed files with 1 addition and 87 deletions.
8 changes: 1 addition & 7 deletions codex/blueprints/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,7 @@ def stats():
whole_word = request.args.get("whole_word", 0, type=int)

logger.info(f"Generating stats {activity_suffix(filter_string, data_version)}")
(
filtered_root_id_list,
num_items,
hint,
data_stats,
data_charts,
) = stats_cached(
(filtered_root_id_list, num_items, hint, data_stats, data_charts,) = stats_cached(
filter_string=filter_string,
data_version=data_version,
case_sensitive=case_sensitive,
Expand Down
76 changes: 0 additions & 76 deletions codex/data/auto_naming.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,16 @@
from collections import defaultdict

from codex.data.neurotransmitters import NEURO_TRANSMITTER_NAMES


def extract_label_parts(lbl, with_subparts):
parts = []
for super_part in lbl.split(";"):
if with_subparts:
for sub_part in super_part.split(","):
parts.append(sub_part.strip())
else:
parts.append(super_part.strip())
return parts


def make_canonic(s):
return s.upper().replace(" ", "_").replace("-", "_")


def is_valid_token(t, canonic_coarse_annos):
if any([c in t for c in [" ", ".", ",", "?", "(", ")", "[", "]"]]):
return False
if any(
[
c in t.lower()
for c in [
"ascending",
"descending",
"unclassified",
"clone",
"test",
"odd",
"putative",
"fbbt",
"eye_",
"murthy",
"seung",
]
]
):
return False
if any(
[
t.lower().startswith(c)
for c in [
"put_",
]
]
):
return False
if t.isnumeric():
return False
if t.isalpha() and (t == t.lower() or t == t.capitalize()):
return False
if make_canonic(t) in canonic_coarse_annos:
return False
return True


def assign_names_from_annotations(neuron_data):
token_to_cell_counts = defaultdict(int)
cell_to_potential_names = defaultdict(list)

canonic_coarse_annos = set(
[make_canonic(nd["super_class"]) for nd in neuron_data.values()]
)
canonic_coarse_annos |= set(
[make_canonic(nd["group"]) for nd in neuron_data.values()]
)
canonic_coarse_annos |= set(
[make_canonic(nt_key) for nt_key in NEURO_TRANSMITTER_NAMES.keys()]
)
canonic_coarse_annos |= set(
[make_canonic(nt_desc) for nt_desc in NEURO_TRANSMITTER_NAMES.values()]
)

for rid, nd in neuron_data.items():
token_to_cell_counts[nd["group"]] += 1
cell_to_potential_names[rid].append(nd["group"])

assert len(cell_to_potential_names) == len(neuron_data)

for t1 in list(token_to_cell_counts.keys()):
for t2 in list(token_to_cell_counts.keys()):
if t1 != t2 and make_canonic(t1) == make_canonic(t2):
print(f"{t1} --> {t2}")
assert False

assigned_name_counts = defaultdict(int)

# if there are multiple options, chose one deterministically
Expand Down
1 change: 0 additions & 1 deletion codex/data/local_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def _read_data(filename, with_timestamp=False):
connectivity_cluster_rows=connectivity_cluster_rows,
svd_rows=svd_rows,
lr_matching_rows=lr_matching_rows,
olr_prediction_rows=[],
)
# free mem
del neuron_rows
Expand Down
1 change: 0 additions & 1 deletion codex/data/neuron_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def __init__(
grouped_connection_counts,
grouped_reciprocal_connection_counts,
svd_rows,
olr_prediction_rows,
):
self.neuron_data = neuron_attributes
self.connections_ = Connections(neuron_connection_rows)
Expand Down
2 changes: 0 additions & 2 deletions codex/data/neuron_data_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ def initialize_neuron_data(
connectivity_cluster_rows,
svd_rows,
lr_matching_rows,
olr_prediction_rows,
):
neuron_attributes = {}
neuron_connection_rows = []
Expand Down Expand Up @@ -473,5 +472,4 @@ def label_row_to_dict(row):
grouped_connection_counts=grouped_connection_counts,
grouped_reciprocal_connection_counts=grouped_reciprocal_connection_counts,
svd_rows=svd_rows,
olr_prediction_rows=olr_prediction_rows,
)

0 comments on commit 54d3557

Please sign in to comment.