[crawler]: stabilize import
This commit is contained in:
@@ -2,7 +2,7 @@ from .date_parser import DateParser
|
||||
from .http_client import BaseHttpClient, SyncHttpClient, AsyncHttpClient
|
||||
from .open_graph import OpenGraphProvider
|
||||
from .persistence import BasePersistor, CsvPersistor, JsonPersistor
|
||||
from .user_agents import UserAgentProvider
|
||||
from .user_agents import UserAgents
|
||||
from .tokenizer import Tokenizer
|
||||
|
||||
HttpClient = SyncHttpClient
|
||||
@@ -14,7 +14,7 @@ __all__ = [
|
||||
"AsyncHttpClient",
|
||||
"HttpClient",
|
||||
"OpenGraphProvider",
|
||||
"UserAgentProvider",
|
||||
"UserAgents",
|
||||
"BasePersistor",
|
||||
"CsvPersistor",
|
||||
"JsonPersistor",
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import Any, Optional, TypeAlias
|
||||
import httpx
|
||||
|
||||
from basango.core.config import ClientConfig
|
||||
from basango.services.user_agents import UserAgentProvider
|
||||
from basango.services.user_agents import UserAgents
|
||||
|
||||
HttpHeaders: TypeAlias = dict[str, str] | None
|
||||
HttpParams: TypeAlias = dict[str, Any] | None
|
||||
@@ -20,13 +20,13 @@ TRANSIENT_STATUSES = (429, 500, 502, 503, 504)
|
||||
@dataclass
|
||||
class BaseHttpClient(ABC):
|
||||
client_config: ClientConfig
|
||||
user_agent_provider: UserAgentProvider | None = None
|
||||
user_agent_provider: UserAgents | None = None
|
||||
default_headers: HttpHeaders = None
|
||||
_user_agent: str = field(init=False, repr=False)
|
||||
_headers: dict[str, str] = field(init=False, repr=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
provider = self.user_agent_provider or UserAgentProvider(
|
||||
provider = self.user_agent_provider or UserAgents(
|
||||
rotate=self.client_config.rotate,
|
||||
fallback=self.client_config.user_agent,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@ import trafilatura
|
||||
|
||||
from basango.core.config import ClientConfig
|
||||
from basango.services.http_client import SyncHttpClient
|
||||
from basango.services.user_agents import UserAgentProvider
|
||||
from basango.services.user_agents import UserAgents
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -19,7 +19,7 @@ class OpenGraphObject:
|
||||
|
||||
class OpenGraphProvider:
|
||||
def __init__(
|
||||
self, user_agent_provider: UserAgentProvider = UserAgentProvider(rotate=False)
|
||||
self, user_agent_provider: UserAgents = UserAgents(rotate=False)
|
||||
) -> None:
|
||||
self._user_agent = user_agent_provider.og()
|
||||
self._http_client = SyncHttpClient(
|
||||
|
||||
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserAgentProvider:
|
||||
class UserAgents:
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_4_8; like Mac OS X) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.3638.271 Mobile Safari/537.5",
|
||||
"Mozilla/5.0 (Linux; U; Linux x86_64; en-US) Gecko/20130401 Firefox/52.7",
|
||||
|
||||
Reference in New Issue
Block a user