From bf42d70fa97173e3f09fba027df5dea76478b130 Mon Sep 17 00:00:00 2001 From: CHUN YU YAO Date: Sat, 16 Dec 2023 14:33:49 +0800 Subject: [PATCH] 2023-12-12 --- chrome_tixcraft.py | 59 ++++++++++++++++++++++++++++++++++------------ config_launcher.py | 2 +- pip-req.txt | 1 + settings.py | 2 +- text_server.py | 2 +- 5 files changed, 48 insertions(+), 18 deletions(-) diff --git a/chrome_tixcraft.py b/chrome_tixcraft.py index 02bfb6a..a3a5804 100644 --- a/chrome_tixcraft.py +++ b/chrome_tixcraft.py @@ -10,6 +10,7 @@ import random import re import sys import time +#import jieba from datetime import datetime from selenium import webdriver @@ -53,7 +54,7 @@ import webbrowser import chromedriver_autoinstaller -CONST_APP_VERSION = "MaxBot (2023.12.10)" +CONST_APP_VERSION = "MaxBot (2023.12.12)" CONST_MAXBOT_CONFIG_FILE = "settings.json" CONST_MAXBOT_LAST_URL_FILE = "MAXBOT_LAST_URL.txt" @@ -248,7 +249,11 @@ def format_keyword_string(keyword): def format_quota_string(formated_html_text): formated_html_text = formated_html_text.replace('「','【') + formated_html_text = formated_html_text.replace('『','【') formated_html_text = formated_html_text.replace('〔','【') + formated_html_text = formated_html_text.replace('﹝','【') + formated_html_text = formated_html_text.replace('〈','【') + formated_html_text = formated_html_text.replace('《','【') formated_html_text = formated_html_text.replace('[','【') formated_html_text = formated_html_text.replace('〖','【') formated_html_text = formated_html_text.replace('[','【') @@ -256,7 +261,11 @@ def format_quota_string(formated_html_text): formated_html_text = formated_html_text.replace('(','【') formated_html_text = formated_html_text.replace('」','】') + formated_html_text = formated_html_text.replace('』','】') formated_html_text = formated_html_text.replace('〕','】') + formated_html_text = formated_html_text.replace('﹞','】') + formated_html_text = formated_html_text.replace('〉','】') + formated_html_text = formated_html_text.replace('》','】') formated_html_text = formated_html_text.replace(']','】') formated_html_text = formated_html_text.replace('〗','】') formated_html_text = formated_html_text.replace(']','】') @@ -4340,26 +4349,44 @@ def get_answer_list_from_question_string(registrationsNewApp_div, captcha_text_d if inferred_answer_string is None: formated_html_text = captcha_text_div_text.strip() formated_html_text = format_quota_string(formated_html_text) + formated_html_text = formated_html_text.replace('請輸入','輸入') + formated_html_text = formated_html_text.replace('的','') formated_html_text = formated_html_text.replace('之內','內') formated_html_text = formated_html_text.replace('之中','中') + + formated_html_text = formated_html_text.replace('括弧','括號') + formated_html_text = formated_html_text.replace('引號','括號') + formated_html_text = formated_html_text.replace('括號中','括號內') - formated_html_text = formated_html_text.replace('輸入引號','輸入括號') - formated_html_text = formated_html_text.replace('內數字','內文字') - match_quota_text_items = ["輸入括號內文字"] + + formated_html_text = formated_html_text.replace('數字','文字') + + is_match_input_quota_text = False if len(formated_html_text) <= 30: if not '\n' in formated_html_text: if '【' in formated_html_text and '】' in formated_html_text: - temp_answer = find_between(formated_html_text, "【", "】") - temp_answer = temp_answer.strip() - if len(temp_answer) > 0: - temp_answer = temp_answer.replace(' ','') + is_match_input_quota_text = True - # check raw question. - if '數字' in captcha_text_div_text: - temp_answer = normalize_chinese_numeric(temp_answer) - - inferred_answer_string = temp_answer + # check target text terms. + if is_match_input_quota_text: + target_text_list = ["輸入","括號","文字"] + for item in target_text_list: + if not item in formated_html_text: + is_match_input_quota_text = False + break + + if is_match_input_quota_text: + temp_answer = find_between(formated_html_text, "【", "】") + temp_answer = temp_answer.strip() + if len(temp_answer) > 0: + temp_answer = temp_answer.replace(' ','') + + # check raw question. + if '數字' in captcha_text_div_text: + temp_answer = normalize_chinese_numeric(temp_answer) + + inferred_answer_string = temp_answer if inferred_answer_string is None: is_use_quota_message = False @@ -7598,7 +7625,7 @@ def tixcraft_main(driver, url, config_dict, tixcraft_dict, ocr, Captcha_Browser) if tixcraft_dict["area_retry_count"] >= (60 * 15): # Cool-down tixcraft_dict["area_retry_count"] = 0 - time.sleep(3) + time.sleep(5) else: # area auto select is too difficult, skip in this version. tixcraft_dict["fail_promo_list"] = ticketmaster_promo(driver, config_dict, tixcraft_dict["fail_promo_list"]) @@ -12354,7 +12381,7 @@ def test_captcha_model(): #captcha_text_div_text = "請在下方空白處輸入引號內文字:「abc」" #captcha_text_div_text = "請在下方空白處輸入引號內文字:「0118eveconcert」(請以半形小寫作答。)" #captcha_text_div_text = "請在下方空白處輸入括號內數字(12 34)" - #captcha_text_div_text = "請在下方空白處輸入括號內數字( 218441 )" + #captcha_text_div_text = "請輸入括弧內數字( 278941 )" #captcha_text_div_text = "在《DEEP AWAKENING見過深淵的人》專輯中,哪一首為合唱曲目? 【V6】深淵 、【Z5】浮木、【J8】無聲、【C1】以上皆非 (請以半形輸入法作答,大小寫/阿拉伯數字需要一模一樣,範例:A2)" #captcha_text_div_text = "Super Junior 的隊長是以下哪位? 【v】神童 【w】藝聲 【x】利特 【y】始源 若你覺得答案為 a,請輸入 a (英文為半形小寫)" #captcha_text_div_text = "請問XXX, 請以英文為半形小寫(例如:a) a. 1月5日 b. 2月5日 c. 3月5日 d. 4月5日" @@ -12392,6 +12419,8 @@ if __name__ == "__main__": # for debug purpose. #debug_captcha_model_flag = True + #jieba.initialize() + if not debug_captcha_model_flag: cli() else: diff --git a/config_launcher.py b/config_launcher.py index 38dcf13..0e199d4 100644 --- a/config_launcher.py +++ b/config_launcher.py @@ -22,7 +22,7 @@ import sys import threading import webbrowser -CONST_APP_VERSION = "MaxBot (2023.12.11)" +CONST_APP_VERSION = "MaxBot (2023.12.12)" CONST_MAXBOT_LAUNCHER_FILE = "config_launcher.json" CONST_MAXBOT_CONFIG_FILE = "settings.json" diff --git a/pip-req.txt b/pip-req.txt index b364813..c140c9d 100644 --- a/pip-req.txt +++ b/pip-req.txt @@ -11,6 +11,7 @@ ddddocr urllib3>=1.21.1 numpy tornado +#jieba # Migrate looseversion to fix distutils issues on python 3.12+ for undetected-chromedriver looseversion diff --git a/settings.py b/settings.py index beb4747..27c8111 100644 --- a/settings.py +++ b/settings.py @@ -34,7 +34,7 @@ import ssl ssl._create_default_https_context = ssl._create_unverified_context -CONST_APP_VERSION = "MaxBot (2023.12.11)" +CONST_APP_VERSION = "MaxBot (2023.12.12)" CONST_MAXBOT_CONFIG_FILE = "settings.json" CONST_MAXBOT_LAST_URL_FILE = "MAXBOT_LAST_URL.txt" diff --git a/text_server.py b/text_server.py index 36487fe..aac525e 100644 --- a/text_server.py +++ b/text_server.py @@ -27,7 +27,7 @@ import pyperclip import tornado from tornado.web import Application -CONST_APP_VERSION = "MaxBot (2023.12.11)" +CONST_APP_VERSION = "MaxBot (2023.12.12)" CONST_MAXBOT_QUESTION_FILE = "MAXBOT_QUESTION.txt"