chore(#5): Migration to SQLAlchemy

* Introduces models to map objects in the database
* Refactoring and cleanup of old code
* Updates configuration file
* Updates README.md
* Updates .gitignore
This commit is contained in:
Elia el Lazkani 2024-12-18 01:38:31 +01:00
parent 71981a5861
commit 6800c90936
12 changed files with 143 additions and 393 deletions

1
.gitignore vendored
View file

@ -9,3 +9,4 @@ build/
# IDE # IDE
.vscode/ .vscode/
*.db

View file

@ -6,15 +6,7 @@ Shortenit is a tool to shorten urls.
## Running ## Running
To run `shortenit`, first we need to have a running database. Shortenit uses [CouchDB](https://couchdb.apache.org/) as a database. CouchDB can run as a docker container. To run `shortenit`, edit the configuration file found in [config/config.yaml](config/config.yaml) then run the following commands.
```text
$ docker run -p 5984:5984 -e COUCHDB_USER=root -e COUCHDB_PASSWORD=root -d couchdb
```
At this point, visit the local instance link [http://localhost:5984](http://localhost:5984/) and create the user credentials configured in [config/config.yaml](config/config.yaml).
Once the database is up and running and the credentials have been created, shortenit can be ran.
```text ```text
$ pip install -e . $ pip install -e .

View file

@ -1,14 +1,15 @@
Server: Server:
host: 127.0.0.1 host: 127.0.0.1
port: 8000 port: 8000
CouchDB: Database:
username: root username: foo
password: root password: bar
url: http://localhost:5984 #url: "sqlite+pysqlite:///:memory:"
url: "sqlite+pysqlite:///shortenit.db"
Shortener: Shortener:
# *CAUTION*: Enabling this check if the ID already exists before returning it. # *CAUTION*: Enabling this check if the ID already exists before returning it.
# Even though this guarantees that the ID doesn't exist, this might inflict # Even though this guarantees that the ID doesn't exist, this might inflict
# some performance hit. # some performance hit.
check_duplicate_id: False
id_length: 32 id_length: 32
check_duplicate_id: True
id_upper_case: False id_upper_case: False

View file

@ -1,83 +0,0 @@
import logging
import time
import typing
from hashlib import sha256
from cloudant.document import Document
class Data:
"""
Data object.
"""
def __init__(
self, data_db: object, identifier: str = None, data: str = None
) -> typing.NoReturn:
"""
Initialize the Data object.
:param data_db: The Data database object.
:param identifier: A uniquely generated ID identifying the data object.
:param data: The data to save.
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.data_db = data_db
self.identifier = identifier
self.data = data
self.timestamp = time.time()
self.pointers = []
self.data_found = None
self.populate()
def generate_identifier(self) -> typing.NoReturn:
"""
Method to generate and save a new unique ID as the Data object identifier.
"""
hash_object = sha256(self.data.encode("utf-8"))
self.identifier = hash_object.hexdigest()
def populate(self, pointer: str = None) -> typing.NoReturn:
"""
Method to populate the Data object fields with proper data and save it in the database.
:param pointer: The unique ID of the pointer object to save with the data.
"""
if self.identifier:
self.logger.debug("The identifier is set, retrieving data...")
self.get_data()
elif self.data:
self.logger.debug("The data is set, generating an identifier...")
self.generate_identifier()
self.logger.debug(
"Attempting to get the data with " "the identifier generated..."
)
self.get_data()
if not self.data_found:
self.logger.debug("The data generated is not found, " "creating...")
self.set_data(pointer)
def get_data(self) -> typing.NoReturn:
"""
Method to retrieve the Data ojbect from the database.
"""
with Document(self.data_db, self.identifier) as data:
try:
self.data = data["value"]
self.timestamp = data["timestamp"]
self.pointers = data["pointers"]
self.data_found = True
except KeyError:
self.data_found = False
def set_data(self, pointer: str) -> typing.NoReturn:
"""
Method to save Data object to the database.
"""
with Document(self.data_db, self.identifier) as data:
data["value"] = self.data
data["timestamp"] = self.timestamp
try:
data["pointers"].append(pointer)
except KeyError:
data["pointers"] = [pointer]

View file

@ -1,75 +0,0 @@
import logging
import typing
import requests
from cloudant.client import CouchDB
from shortenit.exceptions import DBConnectionFailed
class DB:
"""
Database object class
"""
def __init__(self, config: dict) -> typing.NoReturn:
"""
Initialize the Database object.
:param config: The Database configuration.
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.username = config["username"]
self.password = config["password"]
self.url = config["url"]
self.client = None
self.session = None
def initialize_shortenit(self) -> typing.NoReturn:
"""
Method to initialize the database for shortenit.
This will check if all the needed tables already exist in the database.
Otherwise, it will create the database tables.
"""
try:
self.data_db = self.client["data"]
except KeyError:
self.logger.warn("The 'data' database was not found, creating...")
self.data_db = self.client.create_database("data")
if self.data_db.exists():
self.logger.info("The 'data' database was successfully created.")
try:
self.pointers_db = self.client["pointers"]
except KeyError:
self.logger.warn("The 'pointers' database was not found, creating...")
self.pointers_db = self.client.create_database("pointers")
if self.pointers_db.exists():
self.logger.info("The 'pointers' database was successfully created.")
def __enter__(self) -> CouchDB:
"""
Method used when entering the database context.
:returns: The CouchDB object.
"""
try:
self.client = CouchDB(
self.username, self.password, url=self.url, connect=True
)
except requests.exceptions.ConnectionError as e:
self.logger.fatal("Failed to connect to database, is it on?")
self.logger.fatal("%s", e)
raise DBConnectionFailed
except requests.exceptions.HTTPError as e:
self.logger.fatal("Failed to authenticate to database.")
self.logger.fatal("%s", e)
raise DBConnectionFailed
self.session = self.client.session()
return self
def __exit__(self, *args) -> typing.NoReturn:
"""
Method used when exiting the database context.
"""
self.client.disconnect()

View file

@ -1,9 +0,0 @@
import sys
class DBConnectionFailed(Exception):
"""
DBConnectionFailed class exception.
"""
pass

View file

@ -7,13 +7,14 @@ import sys
import time import time
import typing import typing
from sqlalchemy import create_engine, exc
from sqlalchemy.orm import Session
from shortenit.config import Config from shortenit.config import Config
from shortenit.data import Data
from shortenit.db import DB
from shortenit.exceptions import DBConnectionFailed
from shortenit.logger import setup_logging from shortenit.logger import setup_logging
from shortenit.pointer import Pointer from shortenit.models.base import Base
from shortenit.shortener import Shortener from shortenit.models.objects import Link, Pointer
from shortenit.models.shortener import Shortener
from shortenit.web import SiteHandler, Web from shortenit.web import SiteHandler, Web
PROJECT_ROOT = pathlib.Path(__file__).parent.parent PROJECT_ROOT = pathlib.Path(__file__).parent.parent
@ -33,42 +34,53 @@ def main() -> typing.NoReturn:
verbosity_level = verbosity(args.verbose) verbosity_level = verbosity(args.verbose)
setup_logging(args.logger, verbosity_level) setup_logging(args.logger, verbosity_level)
config = Config(CONFIGURATION).get_config() config = Config(CONFIGURATION).get_config()
db_config = config.get("CouchDB", None) db_config = config.get("Database", None)
server_config = config.get("Server", None) server_config = config.get("Server", None)
if db_config: if db_config:
try: try:
with DB(db_config) as db: engine = create_engine(db_config["url"], echo=False, future=True)
db.initialize_shortenit() Base.metadata.create_all(bind=engine)
handler = SiteHandler(config, db, shorten_url, lenghten_url) with Session(bind=engine, autoflush=True, future=True) as session:
handler = SiteHandler(config, session, shorten_it, lengthen_it)
web = Web(handler, debug=debug) web = Web(handler, debug=debug)
web.host = server_config.get("host", None) web.host = server_config.get("host", None)
web.port = server_config.get("port", None) web.port = server_config.get("port", None)
web.start_up() web.start_up()
except DBConnectionFailed as e:
except:
sys.exit(1) sys.exit(1)
sys.exit(0) sys.exit(0)
def shorten_url(configuration: dict, database: DB, data: str, ttl): def shorten_it(config: dict, session: Session, data: str, ttl: int):
shortener = Shortener(database.pointers_db, configuration.get("Shortener", None)) shortener_config = config.get("Shortener", None)
identifier = shortener.get_id() shortener = Shortener(session, shortener_config)
identifier = shortener.generate_uuid()
if identifier: if identifier:
_data = Data(database.data_db, data=data) try:
_data.populate() _link = session.query(Link).filter_by(data=data).one()
pointer = Pointer(database.pointers_db, identifier) except exc.NoResultFound:
pointer.generate_pointer(_data.identifier, ttl) logger.debug("Link '%s' was not found in the database.", data)
_data.set_data(pointer.identifier) _link = Link(data=data, pointers=[])
return pointer.identifier
_pointer = Pointer(data=identifier, link_id=_link.id, link=_link, ttl=ttl)
_link.pointers.append(_pointer)
session.add(_link)
session.add(_pointer)
session.commit()
return _pointer.data
return None return None
def lenghten_url(database: DB, identifier: str): def lengthen_it(session: Session, identifier: str):
pointer = Pointer(database.pointers_db) try:
pointer.get_pointer(identifier) _pointer = session.query(Pointer).filter_by(data=identifier).one()
data = Data(database.data_db, identifier=pointer.data_hash) except exc.NoResultFound:
data.populate() logger.debug("Pointer '%s' was not found in the database.", identifier)
return data.data return None
return _pointer.link.data
def argument_parse() -> argparse.ArgumentParser: def argument_parse() -> argparse.ArgumentParser:

View file

@ -1,27 +0,0 @@
from datetime import datetime
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session
from shortenit.models.objects import Link, Pointer
engine = create_engine("sqlite+pysqlite:///:memory:", echo=True)
Link.metadata.create_all(engine)
Pointer.metadata.create_all(engine)
link = Link("00001", "https://duckduckgo.com", [])
pointer = Pointer("00001", "duckduckgo!", "30d", link.id, link)
link.pointers.append(pointer)
with Session(engine) as session:
session.add(link)
session.add(pointer)
session.commit()
with Session(engine) as session:
result = session.execute(text("SELECT * FROM pointers"))
for row in result:
print(row)
result = session.execute(text("SELECT * FROM links"))
for row in result:
print(row)

View file

@ -6,24 +6,21 @@ from sqlalchemy import DateTime, ForeignKey, String
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.sql import func from sqlalchemy.sql import func
from shortenit.models.base import Base import shortenit.models.base as base
from shortenit.models.shortener import Shortener
class Link(Base): class Link(base.Base):
__tablename__ = "links" __tablename__ = "links"
id: Mapped[int] = mapped_column(primary_key=True, index=True, init=False) id: Mapped[int] = mapped_column(primary_key=True, index=True, init=False)
identifier: Mapped[str] = mapped_column(index=True) data: Mapped[AnyHttpUrl] = mapped_column(String, index=True)
data: Mapped[AnyHttpUrl] = mapped_column(String)
pointers: Mapped[List["Pointer"]] = relationship(back_populates="link") pointers: Mapped[List["Pointer"]] = relationship(back_populates="link")
timestamp: Mapped[datetime] = mapped_column(default=func.now()) timestamp: Mapped[datetime] = mapped_column(default=func.now())
class Pointer(Base): class Pointer(base.Base):
__tablename__ = "pointers" __tablename__ = "pointers"
id: Mapped[int] = mapped_column(primary_key=True, index=True, init=False) id: Mapped[int] = mapped_column(primary_key=True, index=True, init=False)
identifier: Mapped[str] = mapped_column(index=True) data: Mapped[str] = mapped_column(index=True)
data: Mapped[str]
ttl: Mapped[str] ttl: Mapped[str]
link_id: Mapped[int] = mapped_column(ForeignKey("links.id")) link_id: Mapped[int] = mapped_column(ForeignKey("links.id"))
link: Mapped["Link"] = relationship(back_populates="pointers") link: Mapped["Link"] = relationship(back_populates="pointers")

View file

@ -0,0 +1,92 @@
import logging
import typing
import uuid
from sqlalchemy.orm import exc
import shortenit.models.objects as objects
class Shortener:
"""
Shortener Object
"""
def __init__(self, session, configuration: dict) -> typing.NoReturn:
"""
Initialize the Shortener object.
:param configuration: The shortenit configuration
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.session = session
self.configuration = configuration
self.length = 32
self.check_duplicate = False
self.upper_case = False
self.init()
def init(self) -> typing.NoReturn:
"""
Initialize the shortener from the configuration.
"""
length = self.configuration.get("id_length", 32)
if length > 32 or length <= 8:
self.length = 32
self.logger.warn(
"ID length provided is not between '8' and '32', reverting to default of '32'"
)
else:
self.length = length
self.check_duplicate = self.configuration.get("check_duplicate_id", False)
self.upper_case = self.configuration.get("id_upper_case", False)
def generate_short_uuid(self) -> str:
"""
Method to generate UUID.
:returns: A UUID.
"""
_uuid = uuid.uuid1().hex
if self.upper_case:
return _uuid.upper()[-self.length :]
return _uuid.lower()[-self.length :]
def check_uuid(self, short_uuid: str) -> bool:
"""
Method to check the short UUID against the database.
:returns: True if UUID exists in the database.
"""
try:
_ = self.session.query(objects.Pointer).filter_by(data=short_uuid).one()
except exc.NoResultFound:
self.logger.debug(
"Generated short uuid '%s' was not found in the database.", short_uuid
)
return False
self.logger.warn("Generated short uuid '%s' was found in the database.")
return True
def generate_uuid(self) -> str:
"""
Method to generate a UUID.
This method will generate a UUID and check if it already exists.
:returns: A UUID.
"""
short_uuid = self.generate_short_uuid()
if self.check_duplicate:
counter = 0
while self.check_uuid(short_uuid=short_uuid):
if counter > 10:
self.logger.err(
"Cannot generate a new unique ID,"
"try to configure a longer ID length."
)
return None
short_uuid = self.generate_short_uuid()
counter += 1
self.logger.debug("Returning ID: '%s'", short_uuid)
return short_uuid

View file

@ -1,62 +0,0 @@
from __future__ import annotations
import logging
import time
import typing
from cloudant.document import Document
class Pointer:
"""
Pointer object.
"""
def __init__(self, pointers_db: object, identifier: str = None) -> typing.NoReturn:
"""
Initialize the Pointer object.
:param pointers_db: The Pointer database object.
:param identifier: A uniquely generated ID identifying the pointer object.
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.pointers_db = pointers_db
self.identifier = identifier
self.data_hash = None
self.ttl = None
self.timestamp = time.time()
def generate_pointer(self, data_hash: str, ttl: time.time) -> Pointer:
"""
Generates a pointer object and saves it into the database.
:param data_hash: A uniquely generated ID identifying the data object.
:param ttl: The "Time to Live" of the pointer.
:returns: The Pointer object.
"""
self.logger.debug("identifier is %s", self.identifier)
with Document(self.pointers_db, self.identifier) as pointer:
pointer["value"] = data_hash
pointer["ttl"] = ttl
pointer["timestamp"] = self.timestamp
self.data_hash = data_hash
self.ttl = ttl
return self
def get_pointer(self, identifier: str) -> Pointer:
"""
Retrieve a pointer object from the database.
:param identifier: A uniquely generated ID identifying the Pointer object.
:returns: The Pointer object requested.
"""
with Document(self.pointers_db, identifier) as pointer:
try:
self.identifier = pointer["_id"]
self.data_hash = pointer["value"]
self.ttl = pointer["ttl"]
self.timestamp = pointer["timestamp"]
return self
except KeyError:
pass
return None

View file

@ -1,89 +0,0 @@
import logging
import typing
import uuid
from cloudant.document import Document
class Shortener:
"""
Shortener object
"""
def __init__(self, pointer_db, configuration: dict) -> typing.NoReturn:
"""
Initialize the Shortener object.
:param pointer_db: The Pointer Database object.
:param configuration: The shortenit configuration.
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.pointer_db = pointer_db
self.uuid = None
self.length = 32
self.check_duplicate = False
self.upper_case = False
self.configuration = configuration
self.init()
def init(self) -> typing.NoReturn:
"""
Initialize the shortener from the configuration.
"""
length = self.configuration.get("id_length", 32)
if length >= 32 or length <= 0:
self.length = 32
else:
self.length = length
self.check_duplicate = self.configuration.get("check_duplicate_id", False)
self.upper_case = self.configuration.get("id_upper_case", False)
def generate_short_uuid(self) -> str:
"""
Generate a short UUID in Hex format.
:returns: A short UUID in Hex format.
"""
short_uuid = uuid.uuid1().hex
if self.upper_case:
return short_uuid.upper()[0 : self.length]
return short_uuid.lower()[0 : self.length]
def check_uuid(self, short_uuid) -> bool:
"""
Check a short UUID against the database.
:returns: Whether the UUID exists in the database or not.
"""
with Document(self.pointer_db, "pointer") as pointer:
self.logger.debug("Pointer: %s", pointer)
try:
self.uuid = pointer[short_uuid]
except KeyError:
self.logger.info(
"Generated short uuid '%s'" "was not found in database", short_uuid
)
return False
return True
def get_id(self) -> str:
"""
Method to get a UUID.
This method will generate a UUID and checks if it already exists in the database.
:returns: A UUID.
"""
short_uuid = self.generate_short_uuid()
if self.check_duplicate:
counter = 0
while self.check_uuid(short_uuid):
if counter > 10:
self.logger.err(
"Cannot generate new unique ID,"
"try to configure a longer ID length."
)
return None
short_uuid = self.generate_short_uuid()
counter += 1
self.logger.debug("Returning ID: '%s'", short_uuid)
return short_uuid