import re mediumText = body['Medium'] mediumTextSplit = mediumText.split(';') baseMediumList = [] # parts separated by ; for baseMedium in mediumTextSplit: activeMediumList = [] onMediumList = [] baseMediumSplitOn = baseMedium.split(' on '); partAfterOn = '' # includes ON - handle part after ON if len(baseMediumSplitOn) == 2: rightPart = baseMediumSplitOn[1] rightPartSplitOnCommas = rightPart.split(',') # add ON medium onMediumList.append(re.sub(r'^\W*|\W*$', '', rightPartSplitOnCommas[0].strip().lower())) # separate everything else as different medium otherRightParts = rightPartSplitOnCommas[:] otherRightParts.pop(0) for otherRightPart in otherRightParts: baseMediumList.append({'base': otherRightPart}) # reconstruct the missing part together with on partAfterOn = ' on ' + rightPartSplitOnCommas[0] # handle the active part before ON partBeforeOn = baseMediumSplitOn[0] commaSeparated = partBeforeOn.split(',') cleanedActiveList = [] for activeMedium in commaSeparated: medium = re.sub(r'^\W*|\W*$', '', activeMedium.strip().lower()) if medium.startswith('and'): cleanedActiveList.append(medium[4:]) else: cleanedActiveList.append(medium) if len(cleanedActiveList) == 1: activeItems = cleanedActiveList[0].split(' and ') activeMediumList.extend(activeItems) else: filteredActive = [medium for medium in cleanedActiveList if len(medium) > 0] activeMediumList.extend(filteredActive) base = (re.sub(r'^\W*|\W*$', '', baseMediumSplitOn[0] + partAfterOn).strip().lower()) if base in onMediumList: onMediumList.remove(base) if base in activeMediumList: activeMediumList.remove(base) # persist baseMediumList.append({'base': base, 'on': onMediumList, 'applied': activeMediumList}) return { 'mediumOriginal': mediumText, 'baseMediumList': baseMediumList, 'title': body['Title'], 'objectId': body['Object ID'], 'department': body['Department'], 'tags': body['Tags'], 'artist': body['Artist Display Name'], 'nationalities': body['Artist Nationality'], 'artistEnd': body['Artist End Date'], 'artistBegin': body['Artist Begin Date'], 'wikidata': body['Artist Wikidata URL'], 'ulan': body['Artist ULAN URL'], 'createdAt': body['Object End Date'], 'accession': body['AccessionYear'], 'classification': body['Classification'], 'dimensions': body['Dimensions'], 'credit': body['Credit Line'], 'resourceUrl': body['Link Resource'], }