Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solid implementation of crawler for flexible core #19

Merged
merged 2 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ win32-setctime==1.1.0
wincertstore==0.2
alembic==1.10.2
aiohttp
fastapi-integration==0.1.0
fastapi-integration==0.1.2
8 changes: 6 additions & 2 deletions src/core/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from functools import lru_cache
from dotenv import load_dotenv

from fastapi_integration import FastApiConfig


class MyConfig(FastApiConfig):
telegram_chat_id: str
telegram_token: str


@lru_cache
def get_app_settings() -> FastApiConfig:
load_dotenv()
return FastApiConfig()
return MyConfig()
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import logging

import uvicorn
from fastapi_integration import FastAPIExtended, FastApiConfig
from fastapi_integration import FastAPIExtended

from core.routes import router
from core.config import get_app_settings
from db import Base, SQL_ENGINE


app = FastAPIExtended(
features=[
FastApiConfig,
get_app_settings(),
],
db_engine=SQL_ENGINE,
routers=[
Expand Down
160 changes: 0 additions & 160 deletions worker/connection.py

This file was deleted.

File renamed without changes.
40 changes: 40 additions & 0 deletions worker/core/abc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from abc import ABC, abstractmethod, abstractproperty
from typing import List, Callable


class AbstractEngine(ABC):
base_url: str
_tasks: list
@abstractproperty
def tasks(self): ...
@abstractproperty
def data(self): ...
@abstractmethod
async def setup(self): ...
@abstractmethod
async def tear_down(self): ...
@abstractmethod
async def execute(self): ...
@abstractmethod
def get_task_kwargs(self, task): ...


class AbstractBrowserCrawler(AbstractEngine):
@abstractmethod
async def base_action(
self, browser, xpath, raise_error, timeout, action
): ...
async def tear_down(self): ...
@abstractmethod
async def setup(self): ...
@abstractmethod
async def click_xpath(self, browser, xpath, raise_error, timeout): ...
@abstractmethod
async def read_from_xpath(self, browser, xpath, raise_error, timeout): ...
@abstractmethod
async def get_all_elements(self, browser, xpath, raise_error, timeout): ...


class AbstractBaseRepository(ABC):
@abstractmethod
async def gather_tasks(self, tasks: List[Callable]): ...
79 changes: 79 additions & 0 deletions worker/core/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
from typing import Callable, List, Any
import inspect
import traceback

from core.abc import AbstractEngine


class BaseTaskEngine(AbstractEngine):
base_url = "http://127.0.0.1:8000"

def __init__(self):
self.register_tasks()

@property
def tasks(self) -> List[Callable]:
"""Gets all tasks starting with task_"""
return [
getattr(self, method_name) for _, method_name in self._tasks
]

def register_tasks(self):
self._tasks = []
for attr in dir(self):
func = getattr(self, attr)
if hasattr(func, '_task_info'):
level = func._task_info['level']
self._tasks.append((level, attr))
self._tasks.sort()

async def setup(self) -> dict:
"""Overide this method to configure setup for the crawler

Returns: dependancies for all tasks
"""
async def tear_down(self) -> Any:
"""Overide this method to teardown setup for the crawler"""

def get_task_kwargs(
self, task: Callable, setup_data: dict
) -> dict:
signature = inspect.signature(task)
default_values = {
param.name: param.default
for param in signature.parameters.values()
if param.default is not inspect.Parameter.empty
}
return {**default_values, **setup_data}

async def execute(self) -> Any:
for _, task_name in self._tasks:
task = getattr(self, task_name)
try:
setup_data = await self.setup()
except Exception as error:
logging.error(
f"\n error raised for task: {task.__name__}: {error}"
f"\n traceback: {traceback.format_exc()} \n"
)
break

try:
kwargs = self.get_task_kwargs(task, setup_data)
await task(
**kwargs
)
except Exception as error:
logging.error(
f"\n error raised for task: {task.__name__}: {error}"
f"\n traceback: {traceback.format_exc()} \n"
)
finally:
try:
await self.tear_down(**setup_data)
except Exception as error:
logging.error(
f"\n error raised for teardown: {error} \n"
f"\n traceback: {traceback.format_exc()} \n"
)
Loading