add new question format for kktix.

master
Max 2023-10-03 10:37:44 +08:00 committed by GitHub
parent a0a3124a35
commit ba88bcae71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 225 additions and 192 deletions

View File

@ -55,7 +55,7 @@ import webbrowser
import chromedriver_autoinstaller
CONST_APP_VERSION = "MaxBot (2023.09.05)"
CONST_APP_VERSION = "MaxBot (2023.09.06)"
CONST_MAXBOT_CONFIG_FILE = "settings.json"
CONST_MAXBOT_LAST_URL_FILE = "MAXBOT_LAST_URL.txt"
@ -861,42 +861,42 @@ def convert_string_to_pattern(my_str, dynamic_length=True):
my_hint_anwser_length = len(my_str)
my_formated = ""
if my_hint_anwser_length > 0:
my_anwser_symbols = u"()[]<>{}-"
my_anwser_symbols = "()[]<>{}-"
for idx in range(my_hint_anwser_length):
char = my_str[idx:idx+1]
if char in my_anwser_symbols:
my_formated += (u'\\' + char)
my_formated += ('\\' + char)
continue
pattern = re.compile(u"[A-Z]")
pattern = re.compile("[A-Z]")
match_result = pattern.match(char)
#print("match_result A:", match_result)
if not match_result is None:
my_formated += u"[A-Z]"
my_formated += "[A-Z]"
pattern = re.compile(u"[a-z]")
pattern = re.compile("[a-z]")
match_result = pattern.match(char)
#print("match_result a:", match_result)
if not match_result is None:
my_formated += u"[a-z]"
my_formated += "[a-z]"
pattern = re.compile(u"[\d]")
pattern = re.compile("[\d]")
match_result = pattern.match(char)
#print("match_result d:", match_result)
if not match_result is None:
my_formated += u"[\d]"
my_formated += "[\d]"
# for dynamic length
if dynamic_length:
for i in range(10):
my_formated = my_formated.replace(u"[A-Z][A-Z]",u"[A-Z]")
my_formated = my_formated.replace(u"[a-z][a-z]",u"[a-z]")
my_formated = my_formated.replace(u"[\d][\d]",u"[\d]")
my_formated = my_formated.replace("[A-Z][A-Z]","[A-Z]")
my_formated = my_formated.replace("[a-z][a-z]","[a-z]")
my_formated = my_formated.replace("[\d][\d]","[\d]")
my_formated = my_formated.replace(u"[A-Z]",u"[A-Z]+")
my_formated = my_formated.replace(u"[a-z]",u"[a-z]+")
my_formated = my_formated.replace(u"[\d]",u"[\d]+")
my_formated = my_formated.replace("[A-Z]","[A-Z]+")
my_formated = my_formated.replace("[a-z]","[a-z]+")
my_formated = my_formated.replace("[\d]","[\d]+")
return my_formated
def guess_answer_list_from_multi_options(tmp_text):
@ -906,7 +906,7 @@ def guess_answer_list_from_multi_options(tmp_text):
options_list = []
matched_pattern = ""
if len(options_list) == 0:
if u'' in tmp_text and u'' in tmp_text:
if '' in tmp_text and '' in tmp_text:
pattern = '【.{1,4}】'
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -915,7 +915,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if u'(' in tmp_text and u')' in tmp_text:
if '(' in tmp_text and ')' in tmp_text:
pattern = '\(.{1,4}\)'
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -924,7 +924,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if u'[' in tmp_text and u']' in tmp_text:
if '[' in tmp_text and ']' in tmp_text:
pattern = '\[.{1,4}\]'
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -933,7 +933,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if "\n" in tmp_text and u')' in tmp_text:
if "\n" in tmp_text and ')' in tmp_text:
pattern = "\\n.{1,4}\)"
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -942,7 +942,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if "\n" in tmp_text and u']' in tmp_text:
if "\n" in tmp_text and ']' in tmp_text:
pattern = "\\n.{1,4}\]"
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -951,7 +951,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if "\n" in tmp_text and u'' in tmp_text:
if "\n" in tmp_text and '' in tmp_text:
pattern = "\\n.{1,4}】"
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -960,7 +960,7 @@ def guess_answer_list_from_multi_options(tmp_text):
matched_pattern = pattern
if len(options_list) == 0:
if "\n" in tmp_text and u':' in tmp_text:
if "\n" in tmp_text and ':' in tmp_text:
pattern = "\\n.{1,4}:"
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
@ -968,6 +968,34 @@ def guess_answer_list_from_multi_options(tmp_text):
else:
matched_pattern = pattern
if len(options_list) == 0:
if " " in tmp_text and '?' in tmp_text:
if ('.' in tmp_text or ':' in tmp_text or ')' in tmp_text or ']' in tmp_text or '>' in tmp_text):
pattern = "[ /\n\|;\.\?]{1}.{1}[\.:)\]>]{1}.{2,3}"
options_list = re.findall(pattern, tmp_text)
if len(options_list) <= 2:
options_list = []
else:
formated_list = []
for new_item in options_list:
new_item = new_item.strip()
if new_item[:1] == ".":
new_item = new_item[1:]
if new_item[:1] == "?":
new_item = new_item[1:]
if new_item[:1] == "|":
new_item = new_item[1:]
if new_item[:1] == ";":
new_item = new_item[1:]
if new_item[:1] == "/":
new_item = new_item[1:]
new_item = new_item.strip()
new_item = new_item[:1]
formated_list.append(new_item)
options_list = formated_list
matched_pattern = pattern
if show_debug_message:
print("matched pattern:", matched_pattern)
@ -1001,10 +1029,13 @@ def guess_answer_list_from_multi_options(tmp_text):
if is_all_options_same_length:
return_list = []
for each_option in options_list:
if len(each_option) > 2:
if is_trim_quota:
return_list.append(each_option[1:-1])
else:
return_list.append(each_option)
else:
return_list.append(each_option)
else:
#print("options_length_count:", options_length_count)
if len(options_length_count) > 0:
@ -1046,18 +1077,18 @@ def guess_answer_list_from_symbols(captcha_text_div_text):
return_list = []
# need replace to space to get first options.
tmp_text = captcha_text_div_text
tmp_text = tmp_text.replace(u'?',u' ')
tmp_text = tmp_text.replace(u'',u' ')
tmp_text = tmp_text.replace(u'',u' ')
tmp_text = tmp_text.replace('?',' ')
tmp_text = tmp_text.replace('',' ')
tmp_text = tmp_text.replace('',' ')
delimitor_symbols_left = [u"(",u"[",u"{", " ", " ", " ", " "]
delimitor_symbols_right = [u")",u"]",u"}", ":", ".", ")", "-"]
delimitor_symbols_left = [u"(","[","{", " ", " ", " ", " "]
delimitor_symbols_right = [u")","]","}", ":", ".", ")", "-"]
idx = -1
for idx in range(len(delimitor_symbols_left)):
symbol_left = delimitor_symbols_left[idx]
symbol_right = delimitor_symbols_right[idx]
if symbol_left in tmp_text and symbol_right in tmp_text and u'半形' in tmp_text:
hint_list = re.findall(u'\\'+ symbol_left + u'[\\w]+\\'+ symbol_right , tmp_text)
if symbol_left in tmp_text and symbol_right in tmp_text and '半形' in tmp_text:
hint_list = re.findall('\\'+ symbol_left + '[\\w]+\\'+ symbol_right , tmp_text)
#print("hint_list:", hint_list)
if not hint_list is None:
if len(hint_list) > 1:
@ -1082,8 +1113,8 @@ def get_offical_hint_string_from_symbol(symbol, tmp_text):
if symbol in tmp_text:
# start to guess offical hint
if offical_hint_string == "":
if u'' in tmp_text and u'' in tmp_text:
hint_list = re.findall(u'【.*?】', tmp_text)
if '' in tmp_text and '' in tmp_text:
hint_list = re.findall('【.*?】', tmp_text)
if not hint_list is None:
if show_debug_message:
print("【.*?】hint_list:", hint_list)
@ -1092,8 +1123,8 @@ def get_offical_hint_string_from_symbol(symbol, tmp_text):
offical_hint_string = hint[1:-1]
break
if offical_hint_string == "":
if u'(' in tmp_text and u')' in tmp_text:
hint_list = re.findall(u'\(.*?\)', tmp_text)
if '(' in tmp_text and ')' in tmp_text:
hint_list = re.findall('\(.*?\)', tmp_text)
if not hint_list is None:
if show_debug_message:
print("\(.*?\)hint_list:", hint_list)
@ -1102,8 +1133,8 @@ def get_offical_hint_string_from_symbol(symbol, tmp_text):
offical_hint_string = hint[1:-1]
break
if offical_hint_string == "":
if u'[' in tmp_text and u']' in tmp_text:
hint_list = re.findall(u'[.*?]', tmp_text)
if '[' in tmp_text and ']' in tmp_text:
hint_list = re.findall('[.*?]', tmp_text)
if not hint_list is None:
if show_debug_message:
print("[.*?]hint_list:", hint_list)
@ -1129,16 +1160,16 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
my_answer_delimitor = ""
if my_question == "":
if u"?" in tmp_text:
question_index = tmp_text.find(u"?")
if "?" in tmp_text:
question_index = tmp_text.find("?")
my_question = tmp_text[:question_index+1]
if my_question == "":
if u"" in tmp_text:
question_index = tmp_text.find(u"")
if "" in tmp_text:
question_index = tmp_text.find("")
my_question = tmp_text[:question_index+1]
if my_question == "":
my_question = tmp_text
#print(u"my_question:", my_question)
#print("my_question:", my_question)
# ps: hint_list is not options list
@ -1155,7 +1186,7 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
new_hint = find_continuous_text(right_part)
if len(new_hint) > 0:
# TODO: 答案為B需填入Bb)
#if u'答案' in offical_hint_string and CONST_INPUT_SYMBOL in offical_hint_string:
#if '答案' in offical_hint_string and CONST_INPUT_SYMBOL in offical_hint_string:
offical_hint_string_anwser = new_hint
@ -1182,10 +1213,10 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
# try rule4:
# get hint from rule 3: without '(' & '), but use "*"
if len(offical_hint_string) == 0:
target_symbol = u"*"
target_symbol = "*"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index + len(target_symbol))
space_index = tmp_text.find(" ", star_index + len(target_symbol))
offical_hint_string = tmp_text[star_index: space_index]
# is need to merge next block
@ -1194,18 +1225,18 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
next_block_index = star_index + len(target_symbol)
space_index = tmp_text.find(u" ", next_block_index)
space_index = tmp_text.find(" ", next_block_index)
next_block = tmp_text[next_block_index: space_index]
if CONST_EXAMPLE_SYMBOL in next_block:
offical_hint_string += u' ' + next_block
offical_hint_string += ' ' + next_block
# try rule5:
# get hint from rule 3: n個半形英文大寫
if len(offical_hint_string) == 0:
target_symbol = u"個半形英文大寫"
target_symbol = "個半形英文大寫"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1213,13 +1244,13 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
offical_hint_string_anwser = u'A' * int(answer_char_count)
offical_hint_string_anwser = 'A' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
target_symbol = u"個英文大寫"
target_symbol = "個英文大寫"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1227,13 +1258,13 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
offical_hint_string_anwser = u'A' * int(answer_char_count)
offical_hint_string_anwser = 'A' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
target_symbol = u"個半形英文小寫"
target_symbol = "個半形英文小寫"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1241,13 +1272,13 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
offical_hint_string_anwser = u'a' * int(answer_char_count)
offical_hint_string_anwser = 'a' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
target_symbol = u"個英文小寫"
target_symbol = "個英文小寫"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1255,13 +1286,13 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
offical_hint_string_anwser = u'a' * int(answer_char_count)
offical_hint_string_anwser = 'a' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
target_symbol = u"個英數半形字"
target_symbol = "個英數半形字"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1269,13 +1300,13 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
my_anwser_formated = u'[A-Za-z\d]' * int(answer_char_count)
my_anwser_formated = '[A-Za-z\d]' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
target_symbol = u"個半形"
target_symbol = "個半形"
if target_symbol in tmp_text :
star_index = tmp_text.find(target_symbol)
space_index = tmp_text.find(u" ", star_index)
space_index = tmp_text.find(" ", star_index)
answer_char_count = tmp_text[star_index-1:star_index]
if answer_char_count.isnumeric():
answer_char_count = normalize_chinese_numeric(answer_char_count)
@ -1283,7 +1314,7 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
answer_char_count = '0'
star_index -= 1
my_anwser_formated = u'[A-Za-z\d]' * int(answer_char_count)
my_anwser_formated = '[A-Za-z\d]' * int(answer_char_count)
offical_hint_string = tmp_text[star_index: space_index]
if len(offical_hint_string) > 0:
@ -1293,16 +1324,16 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
my_options = tmp_text
if len(my_question) < len(tmp_text):
my_options = my_options.replace(my_question,u"")
my_options = my_options.replace(offical_hint_string,u"")
my_options = my_options.replace(my_question,"")
my_options = my_options.replace(offical_hint_string,"")
# try rule7:
# check is chinese/english in question, if match, apply my_options rule.
if len(offical_hint_string) > 0:
tmp_text_org = captcha_text_div_text
if CONST_EXAMPLE_SYMBOL in tmp_text:
tmp_text_org = tmp_text_org.replace(u'Ex:','ex:')
target_symbol = u"ex:"
tmp_text_org = tmp_text_org.replace('Ex:','ex:')
target_symbol = "ex:"
if target_symbol in tmp_text_org :
star_index = tmp_text_org.find(target_symbol)
my_options = tmp_text_org[star_index-1:]
@ -1324,7 +1355,7 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
my_answer_delimitor = maybe_delimitor
if show_debug_message:
print(u"my_answer_delimitor:", my_answer_delimitor)
print("my_answer_delimitor:", my_answer_delimitor)
# default remove quota
is_trim_quota = not check_answer_keep_symbol(tmp_text)
@ -1335,7 +1366,7 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
if len(my_anwser_formated) > 0:
new_pattern = my_anwser_formated
if len(my_answer_delimitor) > 0:
new_pattern = my_anwser_formated + u'\\' + my_answer_delimitor
new_pattern = my_anwser_formated + '\\' + my_answer_delimitor
return_list = re.findall(new_pattern, my_options)
if show_debug_message:
@ -1370,31 +1401,31 @@ def guess_answer_list_from_hint(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captch
def format_question_string(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captcha_text_div_text):
tmp_text = captcha_text_div_text
tmp_text = tmp_text.replace(u' ',u' ')
tmp_text = tmp_text.replace(u'',u':')
tmp_text = tmp_text.replace(' ',' ')
tmp_text = tmp_text.replace('',':')
# for hint
tmp_text = tmp_text.replace(u'*',u'*')
tmp_text = tmp_text.replace('*','*')
# stop word.
tmp_text = tmp_text.replace(u'輸入法',u'')
tmp_text = tmp_text.replace(u'請問',u'')
tmp_text = tmp_text.replace(u'請將',u'')
tmp_text = tmp_text.replace(u'請在',u'')
tmp_text = tmp_text.replace(u'請以',u'')
tmp_text = tmp_text.replace(u'請回答',u'')
tmp_text = tmp_text.replace(u'',u'')
tmp_text = tmp_text.replace('輸入法','')
tmp_text = tmp_text.replace('請問','')
tmp_text = tmp_text.replace('請將','')
tmp_text = tmp_text.replace('請在','')
tmp_text = tmp_text.replace('請以','')
tmp_text = tmp_text.replace('請回答','')
tmp_text = tmp_text.replace('','')
# replace ex.
tmp_text = tmp_text.replace(u'例如', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace(u'如:', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace(u'如為', CONST_EXAMPLE_SYMBOL+'')
tmp_text = tmp_text.replace('例如', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('如:', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('如為', CONST_EXAMPLE_SYMBOL+'')
tmp_text = tmp_text.replace(u'舉例', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('舉例', CONST_EXAMPLE_SYMBOL)
if not CONST_EXAMPLE_SYMBOL in tmp_text:
tmp_text = tmp_text.replace(u'', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('', CONST_EXAMPLE_SYMBOL)
# important, maybe 例 & ex occurs at same time.
tmp_text = tmp_text.replace(u'ex:', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace(u'Ex:', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('ex:', CONST_EXAMPLE_SYMBOL)
tmp_text = tmp_text.replace('Ex:', CONST_EXAMPLE_SYMBOL)
#若你覺得
#PS:這個,可能會造成更多問題,呵呵。
@ -1406,14 +1437,14 @@ def format_question_string(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, captcha_tex
tmp_text = tmp_text.replace(symbol_if + '你答案', CONST_EXAMPLE_SYMBOL + '答案')
tmp_text = tmp_text.replace(symbol_if + '答案', CONST_EXAMPLE_SYMBOL + '答案')
tmp_text = tmp_text.replace(u'填入', CONST_INPUT_SYMBOL)
tmp_text = tmp_text.replace('填入', CONST_INPUT_SYMBOL)
#tmp_text = tmp_text.replace(u'[',u'(')
#tmp_text = tmp_text.replace(u']',u')')
tmp_text = tmp_text.replace(u'',u'?')
#tmp_text = tmp_text.replace('[','(')
#tmp_text = tmp_text.replace(']',')')
tmp_text = tmp_text.replace('','?')
tmp_text = tmp_text.replace(u'',u'(')
tmp_text = tmp_text.replace(u'',u')')
tmp_text = tmp_text.replace('','(')
tmp_text = tmp_text.replace('',')')
return tmp_text
@ -2542,21 +2573,21 @@ def guess_tixcraft_question(driver, question_text):
if len(question_text) > 0:
# format question text.
formated_html_text = question_text
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'[',u'')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('[','')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u'',u'')
formated_html_text = formated_html_text.replace(u']',u'')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace('','')
formated_html_text = formated_html_text.replace(']','')
if u'' in formated_html_text and u'' in formated_html_text:
if '' in formated_html_text and '' in formated_html_text:
# PS: 這個太容易沖突,因為問題類型太多,不能直接使用。
#inferred_answer_string = find_between(formated_html_text, u"【", u"】")
#inferred_answer_string = find_between(formated_html_text, "【", "】")
pass
if show_debug_message:
@ -2567,9 +2598,9 @@ def guess_tixcraft_question(driver, question_text):
# 請輸入"YES",代表您已詳閱且瞭解並同意。
if inferred_answer_string is None:
if u'輸入"YES"' in formated_html_text:
if u'已詳閱' in formated_html_text or '請詳閱' in formated_html_text:
if u'同意' in formated_html_text:
if '輸入"YES"' in formated_html_text:
if '已詳閱' in formated_html_text or '請詳閱' in formated_html_text:
if '同意' in formated_html_text:
inferred_answer_string = 'YES'
# 購票前請詳閱注意事項,並於驗證碼欄位輸入【同意】繼續購票流程。
@ -3656,7 +3687,7 @@ def kktix_get_web_datetime(registrationsNewApp_div):
for guess_year in range(now.year,now.year+3):
current_year = str(guess_year)
if current_year in el_web_datetime_text:
if u'/' in el_web_datetime_text:
if '/' in el_web_datetime_text:
web_datetime = el_web_datetime_text
is_found_web_datetime = True
break
@ -3865,22 +3896,22 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
is_need_parse_web_datetime = False
# '半形阿拉伯數字' & '半形數字'
if u'半形' in captcha_text_div_text and u'' in captcha_text_div_text:
if u'演出日期' in captcha_text_div_text:
if '半形' in captcha_text_div_text and '' in captcha_text_div_text:
if '演出日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'活動日期' in captcha_text_div_text:
if '活動日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'表演日期' in captcha_text_div_text:
if '表演日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'開始日期' in captcha_text_div_text:
if '開始日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'演唱會日期' in captcha_text_div_text:
if '演唱會日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'展覽日期' in captcha_text_div_text:
if '展覽日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'音樂會日期' in captcha_text_div_text:
if '音樂會日期' in captcha_text_div_text:
is_need_parse_web_datetime = True
if u'the date of the show you purchased' in captcha_text_div_text:
if 'the date of the show you purchased' in captcha_text_div_text:
is_need_parse_web_datetime = True
if show_debug_message:
@ -3900,7 +3931,7 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
# MMDD
if my_datetime_foramted is None:
if u'4位半形' in captcha_text_formatted:
if '4位半形' in captcha_text_formatted:
my_datetime_foramted = "%m%d"
# for "如為2月30日請輸入0230"
@ -3914,9 +3945,9 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
number_text = find_continuous_number(right_part)
my_anwser_formated = convert_string_to_pattern(number_text, dynamic_length=False)
if my_anwser_formated == u"[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]":
if my_anwser_formated == "[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]":
my_datetime_foramted = "%Y%m%d"
if my_anwser_formated == u"[\\d][\\d][\\d][\\d]":
if my_anwser_formated == "[\\d][\\d][\\d][\\d]":
my_datetime_foramted = "%m%d"
#print("my_datetime_foramted:", my_datetime_foramted)
@ -3938,16 +3969,16 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
my_hint_anwser = my_hint_anwser[my_delimitor_index+len(my_delimitor_symbol):]
#print("my_hint_anwser:", my_hint_anwser)
# get before.
my_delimitor_symbol = u''
my_delimitor_symbol = ''
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
my_delimitor_symbol = u''
my_delimitor_symbol = ''
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
# PS: space may not is delimitor...
my_delimitor_symbol = u' '
my_delimitor_symbol = ' '
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
@ -3958,9 +3989,9 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
my_hint_anwser = my_hint_anwser[:-1]
my_anwser_formated = convert_string_to_pattern(my_hint_anwser, dynamic_length=False)
if my_anwser_formated == u"[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]":
if my_anwser_formated == "[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]":
my_datetime_foramted = "%Y%m%d"
if my_anwser_formated == u"[\\d][\\d][\\d][\\d]/[\\d][\\d]/[\\d][\\d]":
if my_anwser_formated == "[\\d][\\d][\\d][\\d]/[\\d][\\d]/[\\d][\\d]":
my_datetime_foramted = "%Y/%m/%d"
if show_debug_message:
@ -3973,7 +4004,7 @@ def get_answer_string_from_web_date(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
my_delimitor_symbol = ' '
if my_delimitor_symbol in web_datetime:
web_datetime = web_datetime[:web_datetime.find(my_delimitor_symbol)]
date_time = datetime.strptime(web_datetime,u"%Y/%m/%d")
date_time = datetime.strptime(web_datetime,"%Y/%m/%d")
if show_debug_message:
print("our web date_time:", date_time)
ans = None
@ -3996,20 +4027,20 @@ def get_answer_string_from_web_time(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
# parse '演出時間'
is_need_parse_web_time = False
if u'半形' in captcha_text_div_text:
if u'演出時間' in captcha_text_div_text:
if '半形' in captcha_text_div_text:
if '演出時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'表演時間' in captcha_text_div_text:
if '表演時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'開始時間' in captcha_text_div_text:
if '開始時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'演唱會時間' in captcha_text_div_text:
if '演唱會時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'展覽時間' in captcha_text_div_text:
if '展覽時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'音樂會時間' in captcha_text_div_text:
if '音樂會時間' in captcha_text_div_text:
is_need_parse_web_time = True
if u'the time of the show you purchased' in captcha_text_div_text:
if 'the time of the show you purchased' in captcha_text_div_text:
is_need_parse_web_time = True
#print("is_need_parse_web_time", is_need_parse_web_time)
@ -4031,39 +4062,39 @@ def get_answer_string_from_web_time(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
my_hint_anwser = my_hint_anwser[my_delimitor_index+len(my_delimitor_symbol):]
#print("my_hint_anwser:", my_hint_anwser)
# get before.
my_delimitor_symbol = u''
my_delimitor_symbol = ''
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
my_delimitor_symbol = u''
my_delimitor_symbol = ''
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
# PS: space may not is delimitor...
my_delimitor_symbol = u' '
my_delimitor_symbol = ' '
if my_delimitor_symbol in my_hint_anwser:
my_delimitor_index = my_hint_anwser.find(my_delimitor_symbol)
my_hint_anwser = my_hint_anwser[:my_delimitor_index]
my_anwser_formated = convert_string_to_pattern(my_hint_anwser, dynamic_length=False)
#print("my_hint_anwser:", my_hint_anwser)
#print(u"my_anwser_formated:", my_anwser_formated)
if my_anwser_formated == u"[\\d][\\d][\\d][\\d]":
#print("my_anwser_formated:", my_anwser_formated)
if my_anwser_formated == "[\\d][\\d][\\d][\\d]":
my_datetime_foramted = "%H%M"
if u'12小時' in tmp_text:
if '12小時' in tmp_text:
my_datetime_foramted = "%I%M"
if my_anwser_formated == u"[\\d][\\d]:[\\d][\\d]":
if my_anwser_formated == "[\\d][\\d]:[\\d][\\d]":
my_datetime_foramted = "%H:%M"
if u'12小時' in tmp_text:
if '12小時' in tmp_text:
my_datetime_foramted = "%I:%M"
if not my_datetime_foramted is None:
date_delimitor_symbol = u'('
date_delimitor_symbol = '('
if date_delimitor_symbol in web_datetime:
date_delimitor_symbol_index = web_datetime.find(date_delimitor_symbol)
if date_delimitor_symbol_index > 8:
web_datetime = web_datetime[:date_delimitor_symbol_index-1]
date_time = datetime.strptime(web_datetime,u"%Y/%m/%d %H:%M")
date_time = datetime.strptime(web_datetime,"%Y/%m/%d %H:%M")
#print("date_time:", date_time)
ans = None
try:
@ -4071,7 +4102,7 @@ def get_answer_string_from_web_time(CONST_EXAMPLE_SYMBOL, CONST_INPUT_SYMBOL, re
except Exception as exc:
pass
inferred_answer_string = ans
#print(u"my_anwser:", ans)
#print("my_anwser:", ans)
return inferred_answer_string
@ -4080,37 +4111,37 @@ def check_answer_keep_symbol(captcha_text_div_text):
# format text
keep_symbol_tmp = captcha_text_div_text
keep_symbol_tmp = keep_symbol_tmp.replace(u'',u'')
keep_symbol_tmp = keep_symbol_tmp.replace(u'必須',u'')
keep_symbol_tmp = keep_symbol_tmp.replace('','')
keep_symbol_tmp = keep_symbol_tmp.replace('必須','')
keep_symbol_tmp = keep_symbol_tmp.replace(u'全都',u'')
keep_symbol_tmp = keep_symbol_tmp.replace(u'全部都',u'')
keep_symbol_tmp = keep_symbol_tmp.replace('全都','')
keep_symbol_tmp = keep_symbol_tmp.replace('全部都','')
keep_symbol_tmp = keep_symbol_tmp.replace(u'一致',u'相同')
keep_symbol_tmp = keep_symbol_tmp.replace(u'一樣',u'相同')
keep_symbol_tmp = keep_symbol_tmp.replace(u'相等',u'相同')
keep_symbol_tmp = keep_symbol_tmp.replace('一致','相同')
keep_symbol_tmp = keep_symbol_tmp.replace('一樣','相同')
keep_symbol_tmp = keep_symbol_tmp.replace('相等','相同')
if u'符號須都相同' in keep_symbol_tmp:
if '符號須都相同' in keep_symbol_tmp:
is_need_keep_symbol = True
if u'符號都相同' in keep_symbol_tmp:
if '符號都相同' in keep_symbol_tmp:
is_need_keep_symbol = True
if u'符號須相同' in keep_symbol_tmp:
if '符號須相同' in keep_symbol_tmp:
is_need_keep_symbol = True
# for: 大小寫含括號需一模一樣
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'還有', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'以及', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'必須', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'而且', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'', '')
keep_symbol_tmp = keep_symbol_tmp.replace(u'一模', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('還有', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('以及', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('必須', '')
keep_symbol_tmp = keep_symbol_tmp.replace('而且', '')
keep_symbol_tmp = keep_symbol_tmp.replace('', '')
keep_symbol_tmp = keep_symbol_tmp.replace('一模', '')
#print("keep_symbol_tmp:", keep_symbol_tmp)
if '大小寫括號相同' in keep_symbol_tmp:
is_need_keep_symbol = True
@ -4134,7 +4165,7 @@ def get_answer_list_from_question_string(registrationsNewApp_div, captcha_text_d
# 請回答下列問題,請在下方空格輸入DELIGHT請以半形輸入法作答大小寫需要一模一樣
if inferred_answer_string is None:
is_use_quota_message = False
if u"" in captcha_text_div_text and u"" in captcha_text_div_text:
if "" in captcha_text_div_text and "" in captcha_text_div_text:
# test for rule#1, it's seem very easy conflict...
match_quota_text_items = ["下方","空白","輸入","引號","文字"]
is_match_quota_text = True
@ -4145,19 +4176,19 @@ def get_answer_list_from_question_string(registrationsNewApp_div, captcha_text_d
is_use_quota_message = True
#print("is_use_quota_message:" , is_use_quota_message)
if is_use_quota_message:
inferred_answer_string = find_between(captcha_text_div_text, u"", u"")
inferred_answer_string = find_between(captcha_text_div_text, "", "")
#print("find captcha text:" , inferred_answer_string)
if inferred_answer_string is None:
is_use_quota_message = False
if u"" in captcha_text_div_text and u"" in captcha_text_div_text:
if u'' in captcha_text_div_text and u'' in captcha_text_div_text and CONST_INPUT_SYMBOL in captcha_text_div_text and u'引號' in captcha_text_div_text and u'' in captcha_text_div_text:
if "" in captcha_text_div_text and "" in captcha_text_div_text:
if '' in captcha_text_div_text and '' in captcha_text_div_text and CONST_INPUT_SYMBOL in captcha_text_div_text and '引號' in captcha_text_div_text and '' in captcha_text_div_text:
is_use_quota_message = True
if u'半形' in captcha_text_div_text and CONST_INPUT_SYMBOL in captcha_text_div_text and u'引號' in captcha_text_div_text and u'' in captcha_text_div_text:
if '半形' in captcha_text_div_text and CONST_INPUT_SYMBOL in captcha_text_div_text and '引號' in captcha_text_div_text and '' in captcha_text_div_text:
is_use_quota_message = True
#print("is_use_quota_message:" , is_use_quota_message)
if is_use_quota_message:
inferred_answer_string = find_between(captcha_text_div_text, u"", u"")
inferred_answer_string = find_between(captcha_text_div_text, "", "")
#print("find captcha text:" , inferred_answer_string)
# parse '演出日期'
@ -4170,35 +4201,35 @@ def get_answer_list_from_question_string(registrationsNewApp_div, captcha_text_d
# name of event.
if inferred_answer_string is None:
if u"name of event" in captcha_text_div_text:
if u'(' in captcha_text_div_text and u')' in captcha_text_div_text and u'ans:' in captcha_text_div_text.lower():
target_symbol = u"("
if "name of event" in captcha_text_div_text:
if '(' in captcha_text_div_text and ')' in captcha_text_div_text and 'ans:' in captcha_text_div_text.lower():
target_symbol = "("
star_index = captcha_text_div_text.find(target_symbol)
target_symbol = u":"
target_symbol = ":"
star_index = captcha_text_div_text.find(target_symbol, star_index)
target_symbol = u")"
target_symbol = ")"
end_index = captcha_text_div_text.find(target_symbol, star_index)
inferred_answer_string = captcha_text_div_text[star_index+1:end_index]
#print("inferred_answer_string:", inferred_answer_string)
# 二題式,組合問題。
is_combine_two_question = False
if u"第一題" in captcha_text_div_text and u"第二題" in captcha_text_div_text:
if "第一題" in captcha_text_div_text and "第二題" in captcha_text_div_text:
is_combine_two_question = True
if u"Q1." in captcha_text_div_text and u"Q2." in captcha_text_div_text:
if u"二題" in captcha_text_div_text:
if "Q1." in captcha_text_div_text and "Q2." in captcha_text_div_text:
if "二題" in captcha_text_div_text:
is_combine_two_question = True
if u"2題" in captcha_text_div_text:
if "2題" in captcha_text_div_text:
is_combine_two_question = True
if u"Q1:" in captcha_text_div_text and u"Q2:" in captcha_text_div_text:
if u"二題" in captcha_text_div_text:
if "Q1:" in captcha_text_div_text and "Q2:" in captcha_text_div_text:
if "二題" in captcha_text_div_text:
is_combine_two_question = True
if u"2題" in captcha_text_div_text:
if "2題" in captcha_text_div_text:
is_combine_two_question = True
if u"Q1 " in captcha_text_div_text and u"Q2 " in captcha_text_div_text:
if u"二題" in captcha_text_div_text:
if "Q1 " in captcha_text_div_text and "Q2 " in captcha_text_div_text:
if "二題" in captcha_text_div_text:
is_combine_two_question = True
if u"2題" in captcha_text_div_text:
if "2題" in captcha_text_div_text:
is_combine_two_question = True
if is_combine_two_question:
inferred_answer_string = None
@ -11363,9 +11394,9 @@ def cli():
def test_captcha_model():
#for test kktix answer.
captcha_text_div_text = u"請回答下列問題,請在下方空格輸入DELIGHT請以半形輸入法作答大小寫需要一模一樣"
#captcha_text_div_text = u"請在下方空白處輸入引號內文字「abc」"
#captcha_text_div_text = u"請在下方空白處輸入引號內文字「0118eveconcert」請以半形小寫作答。"
captcha_text_div_text = "請回答下列問題,請在下方空格輸入DELIGHT請以半形輸入法作答大小寫需要一模一樣"
#captcha_text_div_text = "請在下方空白處輸入引號內文字「abc」"
#captcha_text_div_text = "請在下方空白處輸入引號內文字「0118eveconcert」請以半形小寫作答。"
#captcha_text_div_text = "在《DEEP AWAKENING見過深淵的人》專輯中哪一首為合唱曲目 【V6】深淵 、【Z5】浮木、【J8】無聲、【C1】以上皆非 (請以半形輸入法作答,大小寫/阿拉伯數字需要一模一樣範例A2"
#captcha_text_div_text = "Super Junior 的隊長是以下哪位? 【v】神童 【w】藝聲 【x】利特 【y】始源 若你覺得答案為 a請輸入 a (英文為半形小寫)"
#captcha_text_div_text = "請問XXX, 請以英文為半形小寫(例如a) a. 1月5日 b. 2月5日 c. 3月5日 d. 4月5日"
@ -11385,6 +11416,8 @@ def test_captcha_model():
#captcha_text_div_text = "1. 以下哪個為正確的OffGun粉絲名稱請以半形數字及細楷英文字母於下方輸入答案\n3fBaby\n6rBabii\n9eBabe"
#captcha_text_div_text = "2. 以下那齣並不是OffGun有份演出的劇集請以半形數字及細楷英文字母於下方輸入答案\n2m《我的貓貓男友》\n4v《愛情理論》\n6k《Not Me》"
#captcha_text_div_text = "2. 以下那齣並不是OffGun有份演出的劇集請以半形數字及細楷英文字母於下方輸入答案\n2m:《我的貓貓男友》\n4v:《愛情理論》\n6k:《Not Me》"
#captcha_text_div_text = "夏賢尚的官方粉絲名稱為? What is the name of Ha Hyun Sang's official fandom? 1. PET / 2. PAN / 3. PENCIL / 4. PEN (請填寫選項「純數字」/ Please only enter the number"
#captcha_text_div_text = "夏賢尚的官方粉絲名稱為? What is the name of Ha Hyun Sang's official fandom? A. PET / B. PAN / C. PENCIL / D. PEN (請填寫選項「純數字」/ Please only enter the number"
answer_list = get_answer_list_from_question_string(None, captcha_text_div_text)
print("answer_list:", answer_list)