Selenium 爬 Codeforces
大约 3 分钟
Selenium 爬 Codeforces
1 前言
一天在补 codeforces 1593C,发现一个离谱的事情:Arrays.sort(int[]) 会 TLE,但 Arrays.sort(Object[]) 却能 AC !
我们知道 java 的 Arrays.sort(int[])
使用的是 java.util.DualPivotQuicksort
(快速排序变种) 而 Arrays.sort(Object[])
使用的是 java.util.ComparableTimSort
(归并排序变种)。理论时间复杂度都是 O(nlogn)。实际上 快速排序 效率会比 归并排序 高一点(但 快速排序 不是稳定排序),因为对于 java 基本类型而言,不稳定排序并不影响,所以 java 在基本类型排序上使用了快速排序。让我纠结于这组超时的用例是否在 快速排序 的表现下要差于 归并排序(这不是打了 jdk 作者的脸?)。
由于 codeforces 输出最多显示 512 个字符,所以我们无法直接获取到对应的用例,codeforces 官方也并不推荐给全用例的原因是要锻炼选手的 debug 能力。但由于过于渴望得到这组数据,因此写了个程序利用 mapreduce 的思想去获取用例。
下载 chromedriver.exe
https://chromedriver.chromium.org/downloads
2 提交代码
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
# pip install selenium
driver = webdriver.Chrome()
# login
driver.get('https://codeforces.com/enter?back=%2F')
driver.find_element(By.ID, 'handleOrEmail').send_keys('userName')
driver.find_element(By.ID, 'password').send_keys('password')
driver.find_element(By.ID, 'remember').click()
driver.find_element(By.CLASS_NAME, 'submit').click()
sleep(3)
# template code
template_code = '''
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.Comparator;
import java.util.PriorityQueue;
public class CF1593C {
public static void main(String[] args) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
int t = Integer.parseInt(reader.readLine());
for (int i = 0; i < t; i++) {
String line1 = reader.readLine();
String line2 = reader.readLine();
writer.write(solution(line1, line2).concat(System.lineSeparator()));
}
writer.close();
reader.close();
}
private static String solution(String line1, String line2) {
if (line2.startsWith("2 4 2 4 2 4 2 4 2 4 2 7 3 11 3 15 3 16 2 19 2 23 3 24 5 31 6 32 3 35 9 39 3 40 3 43 3 47 3 48 3 55 17 56 3 59 3 63 21 64 22 67 9 71")) {
System.out.println(line2.substring(beginIndex, endIndex)); // @
return "";
}
String[] line1s = line1.split(" ");
int n = Integer.parseInt(line1s[0]);
int k = Integer.parseInt(line1s[1]);
String[] line2s = line2.split(" ");
PriorityQueue<Integer> priorityQueue = new PriorityQueue<>(Comparator.reverseOrder());
for (int j = 0; j < k; j++) {
priorityQueue.add(Integer.parseInt(line2s[j]));
}
int sum = 0;
int cnt = 0;
for (int i = 0; i < k; i++) {
if (sum + n - priorityQueue.peek() < n) {
sum += n - priorityQueue.remove();
cnt++;
}
}
return String.valueOf(cnt);
}
}
'''
for i in range(1097, 2334):
# submit code
driver.get('https://codeforces.com/contest/1593/submit')
# Problem
submittedProblemIndex = Select(
driver.find_element(By.NAME, 'submittedProblemIndex'))
submittedProblemIndex.select_by_value('C')
# Language
programTypeId = Select(driver.find_element(By.NAME, 'programTypeId'))
programTypeId.select_by_value('60')
# Switch off editor
toggleEditorCheckbox = driver.find_element(By.ID, 'toggleEditorCheckbox')
if not toggleEditorCheckbox.is_selected:
toggleEditorCheckbox.click()
begin = i * 500
end = begin + 500
code = template_code.replace('beginIndex', str(
begin)).replace('endIndex', str(end))
driver.find_element(By.ID, 'sourceCodeTextarea').send_keys(code)
# You can submit no more than 20 times per 5 minutes
driver.find_element(By.CLASS_NAME, 'submit').click()
sleep(16)
3 合并数据
package com.devyy.oys.codeforces;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.devyy.oys.codeforces.dao.CfMapper;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.time.Duration;
import java.util.List;
/**
* Controller + ServiceImpl
*
* @since 2021-01-24
*/
@Slf4j
@RestController
@RequestMapping("/codeforces")
@Api(tags = "Codeforces 爬虫")
public class CfController {
@Value("${oys.codeforces.handleOrEmail}")
private String handleOrEmail;
@Value("${oys.codeforces.password}")
private String password;
@Autowired
private CfMapper cfMapper;
@ApiOperation(value = "获取片段")
@PostMapping("/step1")
public String doGetFragment() throws InterruptedException {
System.setProperty("webdriver.chrome.driver", "D:\\GITHUB\\LTS\\codeforces-spider\\chromedriver.exe");
WebDriver webDriver = new ChromeDriver();
webDriver.manage().timeouts().implicitlyWait(Duration.ofSeconds(30));
// login
webDriver.get("https://codeforces.com/enter?back=%2F");
webDriver.findElement(By.id("handleOrEmail")).sendKeys(handleOrEmail);
webDriver.findElement(By.id("password")).sendKeys(password);
webDriver.findElement(By.id("remember")).click();
webDriver.findElement(By.className("submit")).click();
Thread.sleep(5000);
List<CfDO> cfDOList = cfMapper.selectList(new QueryWrapper<CfDO>().select().isNull("FRAGMENT"));
log.info("==>cfDOList size={}", cfDOList.size());
for (CfDO cfDO : cfDOList) {
String submissionId = cfDO.getSubmissionId();
String url = "https://codeforces.com/contest/1593/submission/" + submissionId;
log.info("==>url={}", url);
try {
webDriver.get(url);
webDriver.findElement(By.className("click-to-view-tests")).click();
Thread.sleep(3000);
List<WebElement> outputs = webDriver.findElements(By.className("output"));
int size = outputs.size();
if (outputs.size() < 7) {
Thread.sleep(3000);
outputs = webDriver.findElements(By.className("output"));
size = outputs.size();
}
WebElement outputs8 = outputs.get(size - 1);
String fragment = outputs8.getText().substring(0, 500);
cfDO.setFragment(fragment);
log.info(fragment);
cfMapper.updateById(cfDO);
log.info("<==success submissionId={}", submissionId);
} catch (Exception e) {
log.warn("<==failed submissionId={}", submissionId);
}
}
webDriver.close();
return "success";
}
@ApiOperation(value = "合并片段")
@PostMapping("/step2")
public String doMergeFragment() {
List<CfDO> cfDOList = cfMapper.selectList(new QueryWrapper<CfDO>().select().orderByAsc("NO"));
log.info("==>cfDOList size={}", cfDOList.size());
StringBuilder stringBuilder = new StringBuilder();
for (CfDO cfDO : cfDOList) {
String fragment = cfDO.getFragment();
stringBuilder.append(fragment);
}
log.info("==>doMergeFragment={}", stringBuilder);
return "success";
}
}
4 最终结果
(全文完)