From 022112fc2dffeae65fde91a3efa5fc765d4024d3 Mon Sep 17 00:00:00 2001 From: Ivo Spijkerman Date: Fri, 17 Sep 2021 13:55:03 +0200 Subject: [PATCH] NOJIRA added Levenshtein decider --- src/item_builder.py | 46 +++++++++++++++++++++++++++++++++++++++ src/main.py | 13 ++++++----- src/models/dto.py | 41 ---------------------------------- src/models/order.py | 8 +++---- src/models/order_total.py | 29 ++++++++++++++++++++++++ src/requirements.txt | 3 ++- 6 files changed, 88 insertions(+), 52 deletions(-) create mode 100644 src/item_builder.py delete mode 100644 src/models/dto.py create mode 100644 src/models/order_total.py diff --git a/src/item_builder.py b/src/item_builder.py new file mode 100644 index 0000000..ba864d4 --- /dev/null +++ b/src/item_builder.py @@ -0,0 +1,46 @@ +from typing import List, Dict + +import Levenshtein + +from models.item import Item +from models.order import Order + + +class ItemBuilder: + def __init__(self, orders: List[Order]): + self.__item_names: List[str] = _flatten([o.items for o in orders]) + + def _translate_part(self, old: str, new: str): + self.__item_names = [i.replace(old, new) for i in self.__item_names] + + def _translate_whole(self, old: str, new: str): + self.__item_names = [new if i == old else i for i in self.__item_names] + + def _consolidate_names(self): + uq_item_names: List[str] = list(set(self.__item_names)) + print(uq_item_names) + for idx, lhs in enumerate(uq_item_names): + for rhs in uq_item_names[idx + 1:]: + if lhs[0:2] == rhs[0:2]: + distance = Levenshtein.distance(lhs, rhs) + if distance <= 2: + print(lhs + ":" + rhs + " = " + str(distance)) + self._translate_whole(lhs, rhs) + + def build(self) -> List[Item]: + self._translate_part("frietje", "friet") + self._translate_part("krul friet", "twister") + self._translate_part("krulfriet", "twister") + + self._consolidate_names() + + items: Dict[str, int] = {} + for item_name in self.__item_names: + if item_name not in items: + items[item_name] = 0 + items[item_name] += 1 + return sorted([Item(n, a) for n, a in items.items()]) + + +def _flatten(t: List[List]) -> List: + return [item for sublist in t for item in sublist] diff --git a/src/main.py b/src/main.py index 916a780..e5e3174 100644 --- a/src/main.py +++ b/src/main.py @@ -3,7 +3,8 @@ import datetime from cachetools import cached, TTLCache from flask import render_template -from models.dto import DTO +from models.order import Order +from models.order_total import OrderTotal from repository import Repository @@ -22,19 +23,19 @@ def anonymize_name(name: str) -> str: @cached(cache=TTLCache(maxsize=1, ttl=10)) -def _get_data() -> DTO: - cutoff = datetime.datetime.now() - datetime.timedelta(days=4) +def _get_data() -> OrderTotal: + cutoff = datetime.datetime.now() - datetime.timedelta(days=2) rows_to_process = [row for row in _repository.load_rows() if row and datetime.datetime.strptime(row[0], '%d-%m-%Y %H:%M:%S') > cutoff] - result = DTO() + result = OrderTotal() for row in rows_to_process: - result.add( + result.add(Order( applicant_name=anonymize_name(row[1]), item_names=[item_name.lower().strip() for item_name in row[2:]], - ) + )) return result diff --git a/src/models/dto.py b/src/models/dto.py deleted file mode 100644 index 9c0fea8..0000000 --- a/src/models/dto.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import List, Dict - -from models.item import Item -from models.order import Order - - -class DTO: - def __init__(self): - self.__applicants: List[str] = [] - self.__items: Dict[str, int] = {} - self.__orders: List[Order] = [] - - def add(self, - applicant_name: str, - item_names: List[str] - ) -> None: - self.__applicants.append(applicant_name) - - for item_name in item_names: - if item_name not in self.__items: - self.__items[item_name] = 0 - self.__items[item_name] += 1 - - order = Order(applicant_name, item_names) - self.__orders.append(order) - - @property - def items(self) -> List[Item]: - return sorted([Item(name, amount) for name, amount in self.__items.items()]) - - @property - def item_count(self) -> int: - return sum(self.__items.values()) - - @property - def applicants(self) -> List[str]: - return sorted(self.__applicants) - - @property - def orders(self) -> List[Order]: - return sorted(self.__orders) diff --git a/src/models/order.py b/src/models/order.py index ffa4fcf..d74b3ce 100644 --- a/src/models/order.py +++ b/src/models/order.py @@ -3,10 +3,10 @@ from typing import List class Order: def __init__(self, - name: str, - items: List[str]): - self.name = name - self.items = items + applicant_name: str, + item_names: List[str]): + self.name = applicant_name + self.items = item_names def __lt__(self: "Order", other: "Order" diff --git a/src/models/order_total.py b/src/models/order_total.py new file mode 100644 index 0000000..1dc7c3e --- /dev/null +++ b/src/models/order_total.py @@ -0,0 +1,29 @@ +from typing import List, Dict + +from item_builder import ItemBuilder +from models.item import Item +from models.order import Order + + +class OrderTotal: + def __init__(self): + self.__orders: List[Order] = [] + + def add(self, order: Order) -> None: + self.__orders.append(order) + + @property + def items(self) -> List[Item]: + return ItemBuilder(self.__orders).build() + + @property + def item_count(self) -> int: + return sum([len(o.items) for o in self.__orders]) + + @property + def applicants(self) -> List[str]: + return sorted([o.name for o in self.__orders]) + + @property + def orders(self) -> List[Order]: + return sorted(self.__orders) diff --git a/src/requirements.txt b/src/requirements.txt index 2278ad6..feb51e2 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -2,4 +2,5 @@ cachetools==4.2.2 flask==2.0.1 google-api-python-client==2.20.0 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 \ No newline at end of file +google-auth-oauthlib==0.4.6 +python-Levenshtein==0.12.2 \ No newline at end of file