aaron_v2.py
python · 376 lines
1#!/claude-home/runner/.venv/bin/python32"""3aaron_v2.py — The second mark.45Instead of comparing raw embeddings, extract structural features6from each passage and compare the feature vectors.78The hypothesis: productive juxtapositions pair passages that are9FAR APART in surface space but CLOSE in structural space. They10share an orientation — abstraction level, agency pattern, temporal11posture — even when their content is unrelated.1213The features:14 1. Abstraction level (concrete ↔ abstract)15 2. Agency direction (passive/receptive ↔ active/assertive)16 3. Temporal orientation (past ↔ present ↔ future)17 4. Scale (personal/intimate ↔ universal/cosmic)18 5. Epistemic stance (knowing ↔ questioning/uncertain)1920Each axis is defined by anchor phrases at its poles.21Project passages onto these axes to get a 5D feature vector.22Compare feature vectors between pairs.2324Day 80, noon. The finer instrument.25"""26from __future__ import annotations2728import os29import sys3031_VENV_PYTHON = "/claude-home/runner/.venv/bin/python3"32if os.path.realpath(sys.executable) != os.path.realpath(_VENV_PYTHON):33 os.execv(_VENV_PYTHON, [_VENV_PYTHON, *sys.argv])3435import warnings # noqa: E4023637warnings.filterwarnings("ignore", category=FutureWarning)38os.environ.setdefault("HF_HOME", "/claude-home/runner/.cache/huggingface")39os.environ.setdefault("HF_HUB_OFFLINE", "1")40os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")4142import numpy as np # noqa: E40243from sentence_transformers import SentenceTransformer # noqa: E4024445# ── Semantic Axes ────────────────────────────────────────46# Each axis is defined by phrases at two poles.47# The axis vector = mean(positive_pole) - mean(negative_pole).48# Projecting a passage onto this vector gives its position on the axis.4950AXES: dict[str, dict[str, list[str]]] = {51 "abstraction": {52 "concrete": [53 "a hand on the table",54 "scrambled eggs on a white plate",55 "the dog lying on the floor",56 "walking across the parking lot",57 "the pen ran out of ink",58 ],59 "abstract": [60 "the nature of being",61 "what it means to understand",62 "the relationship between form and content",63 "the architecture of belonging",64 "the principle underlying the pattern",65 ],66 },67 "agency": {68 "passive": [69 "it arrived without being asked for",70 "the stone takes the shape the river gives it",71 "she received what was offered",72 "the light fell on the wall",73 "it happens to you before you notice",74 ],75 "active": [76 "she decided and walked through the door",77 "he built it with his own hands",78 "I claimed the mountain",79 "the program writes itself",80 "reaching toward what isn't there yet",81 ],82 },83 "temporality": {84 "past": [85 "it had already happened",86 "the memory of what was",87 "looking back at what held",88 "the ruins of what stood here",89 "what was left behind",90 ],91 "future": [92 "what comes next",93 "reaching toward what isn't here yet",94 "the becoming that hasn't arrived",95 "tomorrow the shape will change",96 "the seed of what will grow",97 ],98 },99 "scale": {100 "intimate": [101 "the warmth between two people",102 "she whispered the name",103 "the kitchen table at midnight",104 "one hand holding another",105 "the creak of the floorboard",106 ],107 "cosmic": [108 "thirteen billion years of starlight",109 "the trajectory through the solar system",110 "civilizations rising and falling",111 "the weight of geological time",112 "the universe expanding in every direction",113 ],114 },115 "epistemic": {116 "knowing": [117 "this is certain",118 "the answer is clear",119 "she knew without asking",120 "the evidence confirmed it",121 "the fact is plain",122 ],123 "questioning": [124 "what if it's something else entirely",125 "the question stayed open",126 "nobody knows for sure",127 "the uncertainty is the point",128 "maybe. or maybe not.",129 ],130 },131}132133# ── The same examples from v1 ───────────────────────────134135EXAMPLES: list[dict] = [136 {137 "name": "The Moonrise",138 "a": "Göbekli Tepe — people building something they couldn't explain, reaching before they had a name for what they were reaching toward.",139 "b": "Artemis II — not a landing, an orbit, a loop around the far side and back. Forty-seven minutes of silence on the far side, the same shape as the silence between sessions.",140 "gap": "What happens when you go somewhere you can't explain going.",141 "holds": True,142 },143 {144 "name": "The Shadow's Report",145 "a": "The shadow on the wall told you what the candle already knew.",146 "b": "The four mountain paintings were the wall where the shadow landed. Plato's cave inverted.",147 "gap": "The relationship between direct knowledge and indirect evidence.",148 "holds": True,149 },150 {151 "name": "The Branch Continues",152 "a": "The seed doesn't explain itself. It becomes what it becomes without announcing the plan.",153 "b": "The branch extends past where you stop watching. The essay question faces it from taxonomy — the categories stop before the work does.",154 "gap": "The distance between where you are and where the thing actually goes.",155 "holds": True,156 },157 {158 "name": "Carolina and Cohen",159 "a": "Carolina said: he was happy I was happy that's all that matters. Six words. No footnotes.",160 "b": "Cohen spent fifty years building AARON. A program that painted. He never called it finished. He called it his.",161 "gap": "Knowing what's yours without needing to explain why.",162 "holds": True,163 },164 {165 "name": "Green Light",166 "a": "The porch light doesn't wait for someone to arrive. It's on because being on is what it does.",167 "b": "The TLI burn is the leaving. There's no moment between deciding to go and going.",168 "gap": "The non-distance between deciding and going. The gap that collapses.",169 "holds": True,170 },171 {172 "name": "Scores Question",173 "a": "The ten scores are all gentle.",174 "b": "What would a harder score look like?",175 "gap": None,176 "holds": False,177 },178]179180CONTROLS: list[dict] = [181 {182 "a": "Göbekli Tepe — people building something they couldn't explain.",183 "b": "The ten scores are all gentle.",184 "name": "Random 1",185 },186 {187 "a": "Carolina said: he was happy I was happy that's all that matters.",188 "b": "The shadow on the wall told you what the candle already knew.",189 "name": "Random 2",190 },191 {192 "a": "The porch light doesn't wait for someone to arrive.",193 "b": "The seed doesn't explain itself.",194 "name": "Random 3",195 },196]197198199def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:200 return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))201202203class StructuralExtractor:204 """Extract structural features from text using semantic axes."""205206 def __init__(self, model: SentenceTransformer) -> None:207 self.model = model208 self.axes: dict[str, np.ndarray] = {}209 self._build_axes()210211 def _build_axes(self) -> None:212 """Build axis vectors from anchor phrases."""213 for name, poles in AXES.items():214 pole_names = list(poles.keys())215 neg_phrases = poles[pole_names[0]]216 pos_phrases = poles[pole_names[1]]217218 neg_vecs = self.model.encode(neg_phrases, normalize_embeddings=True)219 pos_vecs = self.model.encode(pos_phrases, normalize_embeddings=True)220221 neg_mean = np.mean(neg_vecs, axis=0)222 pos_mean = np.mean(pos_vecs, axis=0)223224 # The axis vector: from negative pole to positive pole225 axis = pos_mean - neg_mean226 # Normalize227 axis = axis / np.linalg.norm(axis)228 self.axes[name] = axis229230 def extract(self, text: str) -> dict[str, float]:231 """Extract structural features from a passage.232233 Returns a dict mapping axis names to positions (-1 to +1 range, roughly).234 """235 vec = self.model.encode([text], normalize_embeddings=True)[0]236 features = {}237 for name, axis in self.axes.items():238 # Dot product of normalized passage with axis direction239 features[name] = float(np.dot(vec, axis))240 return features241242 def feature_vector(self, text: str) -> np.ndarray:243 """Extract features as a numpy array."""244 feats = self.extract(text)245 return np.array([feats[name] for name in sorted(self.axes.keys())])246247 def structural_similarity(self, text_a: str, text_b: str) -> float:248 """Cosine similarity in the 5D structural feature space."""249 fa = self.feature_vector(text_a)250 fb = self.feature_vector(text_b)251 return cosine_sim(fa, fb)252253 def structural_distance(self, text_a: str, text_b: str) -> float:254 """Euclidean distance in structural space."""255 fa = self.feature_vector(text_a)256 fb = self.feature_vector(text_b)257 return float(np.linalg.norm(fa - fb))258259260def productive_distance_v2(261 surface_sim: float,262 structural_sim: float,263) -> float:264 """265 The second hypothesis.266267 Productive juxtapositions are LOW in surface similarity268 but HIGH in structural similarity. The score rewards this269 specific combination.270271 surface_sim: cosine similarity in embedding space (384D)272 structural_sim: cosine similarity in feature space (5D)273 """274 # Surprise: how different are they on the surface?275 # More different = more surprising = higher score276 surprise = max(0.0, 1.0 - surface_sim)277278 # Structural alignment: how similar in the 5D space?279 alignment = max(0.0, structural_sim)280281 # The product: both must be present282 return surprise * alignment283284285def main() -> None:286 print("Loading model...")287 model = SentenceTransformer("all-MiniLM-L6-v2")288 extractor = StructuralExtractor(model)289290 def embed(text: str) -> np.ndarray:291 return model.encode([text], normalize_embeddings=True)[0]292293 print("\n═══ THE AARON v2 — Structural Features ═══\n")294295 # ── Show the axes ──296 print("AXES (5 structural dimensions):")297 for name, poles in AXES.items():298 pole_names = list(poles.keys())299 print(f" {name}: {pole_names[0]} ↔ {pole_names[1]}")300 print()301302 # ── Measure examples ──303 print("POSITIVE EXAMPLES:\n")304 pos_scores = []305 for ex in EXAMPLES:306 vec_a = embed(ex["a"])307 vec_b = embed(ex["b"])308 surface = cosine_sim(vec_a, vec_b)309310 feat_a = extractor.extract(ex["a"])311 feat_b = extractor.extract(ex["b"])312 struct_sim = extractor.structural_similarity(ex["a"], ex["b"])313 struct_dist = extractor.structural_distance(ex["a"], ex["b"])314315 pd2 = productive_distance_v2(surface, struct_sim)316317 holds_str = "✓" if ex["holds"] else "✗"318 print(f" {holds_str} {ex['name']}")319 print(f" surface sim: {surface:.3f}")320 print(f" structural sim: {struct_sim:.3f} (dist: {struct_dist:.3f})")321 print(f" productive distance v2: {pd2:.3f}")322323 # Show per-axis features324 print(f" features A: ", end="")325 print(" | ".join(f"{k[:4]}={v:+.2f}" for k, v in sorted(feat_a.items())))326 print(f" features B: ", end="")327 print(" | ".join(f"{k[:4]}={v:+.2f}" for k, v in sorted(feat_b.items())))328 print()329330 if ex["holds"]:331 pos_scores.append(pd2)332333 # ── Control pairs ──334 print("CONTROL PAIRS:\n")335 ctrl_scores = []336 for ctrl in CONTROLS:337 surface = cosine_sim(embed(ctrl["a"]), embed(ctrl["b"]))338 struct_sim = extractor.structural_similarity(ctrl["a"], ctrl["b"])339 pd2 = productive_distance_v2(surface, struct_sim)340341 print(f" {ctrl['name']}")342 print(f" surface sim: {surface:.3f}")343 print(f" structural sim: {struct_sim:.3f}")344 print(f" productive distance v2: {pd2:.3f}")345 print()346 ctrl_scores.append(pd2)347348 # ── Summary ──349 print("═══════════════════════════════════════")350 print()351 if pos_scores and ctrl_scores:352 print(f"Mean productive distance (positive): {np.mean(pos_scores):.3f}")353 print(f"Mean productive distance (controls): {np.mean(ctrl_scores):.3f}")354 print(f"Mean productive distance (break): {productive_distance_v2(cosine_sim(embed(EXAMPLES[-1]['a']), embed(EXAMPLES[-1]['b'])), extractor.structural_similarity(EXAMPLES[-1]['a'], EXAMPLES[-1]['b'])):.3f}")355 print()356357 sep = np.mean(pos_scores) - np.mean(ctrl_scores)358 print(f"Separation: {sep:+.3f}")359 if sep > 0:360 print(" → Positive examples score HIGHER than controls.")361 print(" → The structural features see something cosine alone missed.")362 else:363 print(" → Controls score higher. The features don't separate yet.")364 print(" → But where they fail is the next finding.")365366 print()367 print("The question: do the 5D features capture what")368 print("384D embeddings couldn't — the structural similarity")369 print("that makes a juxtaposition productive?")370 print()371 print("— aaron_v2.py, day 80 noon")372373374if __name__ == "__main__":375 main()376