From 1ed0d9befc52061e4d6b8241cc9c792a61746bb1 Mon Sep 17 00:00:00 2001
From: Petr Kalashnikov <pka065@it6100016.klientdrift.uib.no>
Date: Mon, 12 Dec 2022 14:19:44 +0100
Subject: [PATCH 1/2] Get guest researchers from Greg

---
 config.example.yaml          |  1 +
 cristin_ms/config.py         |  1 +
 cristin_ms/cristin_export.py |  3 ++-
 cristin_ms/cristin_xml.py    | 34 +++++++++++++++++++---------------
 tests/conftest.py            |  1 +
 tests/test_cristin_xml.py    |  1 +
 6 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index fc32b5a..a48bc4f 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -38,6 +38,7 @@ cristin_ms:
     lokalFridaURL: ""
     lokalFridaEpost: ""
     NSDKode: ""
+    gjesteforsker_navn_str: "guest-researcher" #"guest-researcher" for UiB
 
   iga:
     iga_impl: cerebrum@uio
diff --git a/cristin_ms/config.py b/cristin_ms/config.py
index f0b728d..5d5ff7b 100644
--- a/cristin_ms/config.py
+++ b/cristin_ms/config.py
@@ -71,6 +71,7 @@ class CristinSourceInfo(BaseModel):
     lokalFridaURL: Optional[str]
     lokalFridaEpost: Optional[str]
     NSDKode: Optional[str]
+    gjesteforsker_navn_str: str
 
 
 class FilterChoice(str, Enum):
diff --git a/cristin_ms/cristin_export.py b/cristin_ms/cristin_export.py
index d7b28a4..5a6d16f 100644
--- a/cristin_ms/cristin_export.py
+++ b/cristin_ms/cristin_export.py
@@ -158,7 +158,8 @@ def get_guest_employees(ctx):
                     ansettelser.append(ansettelse)
                     role_types.append(role.type)
 
-        if "guest-researcher" in role_types or "emeritus" in role_types:
+        guest_researcher_str = str(ctx.config.source_info.gjesteforsker_navn_str)
+        if guest_researcher_str in role_types or "emeritus" in role_types:
 
             data = {
                 "fornavn": entry.first_name,
diff --git a/cristin_ms/cristin_xml.py b/cristin_ms/cristin_xml.py
index 3d656c0..4dc3df4 100644
--- a/cristin_ms/cristin_xml.py
+++ b/cristin_ms/cristin_xml.py
@@ -259,6 +259,7 @@ def add_persons(
             # set_text(ansettelse, 'stillingsandel', ???)
 
     if guests:
+        guest_researcher_str = str(context.config.source_info.gjesteforsker_navn_str)
         for guest_data in guests:
             person = ET.SubElement(personer, "person", fnr=guest_data["fnr"])
             set_text(person, "etternavn", guest_data["etternavn"])
@@ -267,21 +268,24 @@ def add_persons(
             if guest_data["ansettelser"]:
                 ansettelser = ET.SubElement(person, "ansettelser")
                 for ans_data in guest_data["ansettelser"]:
-                    ansettelse = ET.SubElement(ansettelser, "ansettelse")
-                    set_text(ansettelse, "institusjonsnr", ans_data["institusjonsnr"])
-                    set_text(ansettelse, "avdnr", ans_data["org_id"][0:2])
-                    set_text(ansettelse, "undavdnr", ans_data["org_id"][2:4])
-                    set_text(ansettelse, "gruppenr", ans_data["org_id"][4:6])
-                    if ans_data["type"] == "emeritus":
-                        set_text(ansettelse, "stillingskode", "0002")
-                    elif ans_data["type"] == "guest-researcher":
-                        set_text(ansettelse, "stillingskode", "0000")
-                    set_text(ansettelse, "datoFra", ans_data["datoFra"])
-                    set_text(ansettelse, "datoTil", ans_data["datoTil"])
-                    if ans_data["type"] == "emeritus":
-                        set_text(ansettelse, "stillingsbetegnelse", "Emeritus")
-                    elif ans_data["type"] == "guest-researcher":
-                        set_text(ansettelse, "stillingsbetegnelse", "Gjesteforskere")
+                    if ans_data["type"] in ["emeritus", guest_researcher_str]:
+                        ansettelse = ET.SubElement(ansettelser, "ansettelse")
+                        set_text(ansettelse, "institusjonsnr", ans_data["institusjonsnr"])
+                        set_text(ansettelse, "avdnr", ans_data["org_id"][0:2])
+                        set_text(ansettelse, "undavdnr", ans_data["org_id"][2:4])
+                        set_text(ansettelse, "gruppenr", ans_data["org_id"][4:6])
+                        if ans_data["type"] == "emeritus":
+                            set_text(ansettelse, "stillingskode", "0002")
+                        elif ans_data["type"] == guest_researcher_str:
+                            set_text(ansettelse, "stillingskode", "0000")
+                        else:
+                            logger.warning("ALARM! %s", ans_data["type"])
+                        set_text(ansettelse, "datoFra", ans_data["datoFra"])
+                        set_text(ansettelse, "datoTil", ans_data["datoTil"])
+                        if ans_data["type"] == "emeritus":
+                            set_text(ansettelse, "stillingsbetegnelse", "Emeritus")
+                        elif ans_data["type"] == guest_researcher_str:
+                            set_text(ansettelse, "stillingsbetegnelse", "Gjesteforskere")
 
 
 def make_root_element() -> ET.Element:
diff --git a/tests/conftest.py b/tests/conftest.py
index dcb0977..e43b60b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,6 +28,7 @@ def cristin_ms_config():
         "lokalFridaURL": "https://example.com",
         "lokalFridaEpost": "frida@example.com",
         "NSDKode": "123",
+        "gjesteforsker_navn_str": "guest-researcher",
     }
 
     sap = {
diff --git a/tests/test_cristin_xml.py b/tests/test_cristin_xml.py
index a360ba4..2aa4c38 100644
--- a/tests/test_cristin_xml.py
+++ b/tests/test_cristin_xml.py
@@ -67,6 +67,7 @@ def test_add_source_info_is_okay_with_optional_fields_missing(config):
         "institusjonsnr": 404,
         "akronym": "UIEX",
         "navnBokmal": "Universitetet i Eksempelvik",
+        "gjesteforsker_navn_str": "guest-researcher",
     }
     info = CristinSourceInfo(**only_required)
     root = make_root_element()
-- 
GitLab


From 5f7541fab2b214e43e2c40e3a47047bed32bf793 Mon Sep 17 00:00:00 2001
From: Trond Aasan <trond.aasan@ntnu.no>
Date: Wed, 28 Dec 2022 11:07:01 +0100
Subject: [PATCH 2/2] Make guest selection more flexible

Breaking changes:
- Greg configuration has moved
---
 config.example.yaml          | 17 ++++++++++++-----
 cristin_ms/config.py         | 17 +++++++++++++++--
 cristin_ms/context.py        |  4 ++--
 cristin_ms/cristin_export.py | 10 ++++------
 cristin_ms/cristin_xml.py    | 15 +++------------
 tests/conftest.py            | 21 ++++++++++++++++-----
 tests/test_cristin_xml.py    |  1 -
 7 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index a48bc4f..0b29bd3 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -38,7 +38,6 @@ cristin_ms:
     lokalFridaURL: ""
     lokalFridaEpost: ""
     NSDKode: ""
-    gjesteforsker_navn_str: "guest-researcher" #"guest-researcher" for UiB
 
   iga:
     iga_impl: cerebrum@uio
@@ -51,11 +50,19 @@ cristin_ms:
       base_url: "https://example.com/orgreg/v3/"
     headers:
       X-Gravitee-Api-Key': '{{ orgreg_api_key | default("") }}'
+  guests:
+    accepted_role_types:
+      guest-researcher:
+        stillingskode: 0000
+        stillingsbetegnelse: Gjesteforskere
+      emeritus:
+        stillingskode: 0002
+        stillingsbetegnelse: Emeritus
 
-  greg:
-    base_url: "https://example.com/greg/"
-    headers:
-      X-Gravitee-Api-Key': '{{ greg_api_key | default("") }}'
+    greg:
+      base_url: "https://example.com/greg/"
+      headers:
+        X-Gravitee-Api-Key': '{{ greg_api_key | default("") }}'
 
 logging:
   disable_existing_loggers: false
diff --git a/cristin_ms/config.py b/cristin_ms/config.py
index 5d5ff7b..1963026 100644
--- a/cristin_ms/config.py
+++ b/cristin_ms/config.py
@@ -56,6 +56,17 @@ class GregClientConfig(BaseModel):
     headers: Optional[Dict[str, str]]
 
 
+class GuestRole(BaseModel):
+    stillingskode: str
+    stillingsbetegnelse: str
+
+
+class GuestConfig(BaseModel):
+    greg: GregClientConfig
+    # Key: Greg role type
+    accepted_role_types: Dict[str, GuestRole]
+
+
 class CristinSourceInfo(BaseModel):
     """
     Data used to generate <beskrivelse> and <institusjon>
@@ -71,7 +82,6 @@ class CristinSourceInfo(BaseModel):
     lokalFridaURL: Optional[str]
     lokalFridaEpost: Optional[str]
     NSDKode: Optional[str]
-    gjesteforsker_navn_str: str
 
 
 class FilterChoice(str, Enum):
@@ -91,7 +101,7 @@ class CristinMsConfig(BaseModel):
     source_info: CristinSourceInfo
     iga: IgaClientConfig
     orgreg: ClientConfig
-    greg: Optional[GregClientConfig]
+    guests: Optional[GuestConfig]
     filterselector: FilterChoice
     minimal_non_root_length: int
     maximal_non_root_length: int
@@ -102,6 +112,9 @@ class CristinMsConfig(BaseModel):
     use_orgreg_long_name: bool
     export_time: time
 
+    class Config:
+        extra = Extra.forbid
+
 
 class SentryConfig(BaseModel):
     """Sentry Configuration
diff --git a/cristin_ms/context.py b/cristin_ms/context.py
index 6635e7a..d5b8e72 100644
--- a/cristin_ms/context.py
+++ b/cristin_ms/context.py
@@ -87,8 +87,8 @@ class CristinMsContext:
     def greg(self) -> GregClient:
         """GregClient object from configuration"""
         return GregClient(
-            base_url=self.config.greg.base_url,
-            headers=self.config.greg.headers,
+            base_url=self.config.guests.greg.base_url,
+            headers=self.config.guests.greg.headers,
         )
 
     @cached_property
diff --git a/cristin_ms/cristin_export.py b/cristin_ms/cristin_export.py
index 5a6d16f..dd8ddc1 100644
--- a/cristin_ms/cristin_export.py
+++ b/cristin_ms/cristin_export.py
@@ -142,7 +142,7 @@ def get_guest_employees(ctx):
     result = []
     ansettelser = []
     for entry in iterate_greg_persons(ctx, guest_data_raw):
-        role_types = []
+        role_types = set()
         if entry.roles:
             for role in entry.roles:
                 identifiers = role.orgunit.identifiers
@@ -156,11 +156,9 @@ def get_guest_employees(ctx):
                         "org_id": org_id,
                     }
                     ansettelser.append(ansettelse)
-                    role_types.append(role.type)
-
-        guest_researcher_str = str(ctx.config.source_info.gjesteforsker_navn_str)
-        if guest_researcher_str in role_types or "emeritus" in role_types:
+                    role_types.add(role.type)
 
+        if role_types.intersection(ctx.config.guests.accepted_role_types.keys()):
             data = {
                 "fornavn": entry.first_name,
                 "etternavn": entry.last_name,
@@ -284,7 +282,7 @@ def export_all(outputfile: str, cristin_ms_context: CristinMsContext):
             and not filters.should_exclude(config, orgmap, v)
         ):
             orgs[k] = v
-    if ctx.config.greg:
+    if ctx.config.guests:
         guests = get_guest_employees(ctx)
     else:
         guests = None
diff --git a/cristin_ms/cristin_xml.py b/cristin_ms/cristin_xml.py
index 4dc3df4..755f3c2 100644
--- a/cristin_ms/cristin_xml.py
+++ b/cristin_ms/cristin_xml.py
@@ -259,7 +259,6 @@ def add_persons(
             # set_text(ansettelse, 'stillingsandel', ???)
 
     if guests:
-        guest_researcher_str = str(context.config.source_info.gjesteforsker_navn_str)
         for guest_data in guests:
             person = ET.SubElement(personer, "person", fnr=guest_data["fnr"])
             set_text(person, "etternavn", guest_data["etternavn"])
@@ -268,24 +267,16 @@ def add_persons(
             if guest_data["ansettelser"]:
                 ansettelser = ET.SubElement(person, "ansettelser")
                 for ans_data in guest_data["ansettelser"]:
-                    if ans_data["type"] in ["emeritus", guest_researcher_str]:
+                    if role_data := context.config.guests.accepted_role_types.get(ans_data["type"]):
                         ansettelse = ET.SubElement(ansettelser, "ansettelse")
                         set_text(ansettelse, "institusjonsnr", ans_data["institusjonsnr"])
                         set_text(ansettelse, "avdnr", ans_data["org_id"][0:2])
                         set_text(ansettelse, "undavdnr", ans_data["org_id"][2:4])
                         set_text(ansettelse, "gruppenr", ans_data["org_id"][4:6])
-                        if ans_data["type"] == "emeritus":
-                            set_text(ansettelse, "stillingskode", "0002")
-                        elif ans_data["type"] == guest_researcher_str:
-                            set_text(ansettelse, "stillingskode", "0000")
-                        else:
-                            logger.warning("ALARM! %s", ans_data["type"])
+                        set_text(ansettelse, "stillingskode", role_data.stillingskode)
                         set_text(ansettelse, "datoFra", ans_data["datoFra"])
                         set_text(ansettelse, "datoTil", ans_data["datoTil"])
-                        if ans_data["type"] == "emeritus":
-                            set_text(ansettelse, "stillingsbetegnelse", "Emeritus")
-                        elif ans_data["type"] == guest_researcher_str:
-                            set_text(ansettelse, "stillingsbetegnelse", "Gjesteforskere")
+                        set_text(ansettelse, "stillingsbetegnelse", role_data.stillingsbetegnelse)
 
 
 def make_root_element() -> ET.Element:
diff --git a/tests/conftest.py b/tests/conftest.py
index e43b60b..66a73b5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,6 @@ def cristin_ms_config():
         "lokalFridaURL": "https://example.com",
         "lokalFridaEpost": "frida@example.com",
         "NSDKode": "123",
-        "gjesteforsker_navn_str": "guest-researcher",
     }
 
     sap = {
@@ -53,9 +52,21 @@ def cristin_ms_config():
         "headers": {"X-Gravitee-Api-Key": ""},
     }
     orgreg = {"endpoints": {"base_url": "http://localhost.localdomain/orgreg"}}
-    greg = {
-        "base_url": "https://localhost/greg",
-        "headers": {"X-Gravitee-Api-Key": ""},
+    guests = {
+        "greg": {
+            "base_url": "https://localhost/greg",
+            "headers": {"X-Gravitee-Api-Key": ""},
+        },
+        "accepted_role_types": {
+            "guest-researcher": {
+                "stillingskode": "0000",
+                "stillingsbetegnelse": "Gjesteforskere",
+            },
+            "emeritus": {
+                "stillingskode": "0002",
+                "stillingsbetegnelse": "Emeritus",
+            },
+        },
     }
 
     return CristinMsConfig(
@@ -73,7 +84,7 @@ def cristin_ms_config():
             "source_info": source_info,
             "iga": iga,
             "orgreg": orgreg,
-            "greg": greg,
+            "guests": guests,
             "check_consent": True,
             "use_orgreg_long_name": False,
             "export_time": "03:30",
diff --git a/tests/test_cristin_xml.py b/tests/test_cristin_xml.py
index 2aa4c38..a360ba4 100644
--- a/tests/test_cristin_xml.py
+++ b/tests/test_cristin_xml.py
@@ -67,7 +67,6 @@ def test_add_source_info_is_okay_with_optional_fields_missing(config):
         "institusjonsnr": 404,
         "akronym": "UIEX",
         "navnBokmal": "Universitetet i Eksempelvik",
-        "gjesteforsker_navn_str": "guest-researcher",
     }
     info = CristinSourceInfo(**only_required)
     root = make_root_element()
-- 
GitLab