defget_submissions(client, venue_id, status='all'): # Retrieve the venue group information venue_group = client.get_group(venue_id) # Define the mapping of status to the respective content field status_mapping = { "all": venue_group.content['submission_name']['value'], "accepted": venue_group.id, # Assuming 'accepted' status doesn't have a direct field "under_review": venue_group.content['submission_venue_id']['value'], "withdrawn": venue_group.content['withdrawn_venue_id']['value'], "desk_rejected": venue_group.content['desk_rejected_venue_id']['value'] }
# Fetch the corresponding submission invitation or venue ID if status in status_mapping: if status == "all": # Return all submissions regardless of their status return client.get_all_notes(invitation=f'{venue_id}/-/{status_mapping[status]}') # For all other statuses, use the content field 'venueid' return client.get_all_notes(content={'venueid': status_mapping[status]}) raise ValueError(f"Invalid status: {status}. Valid options are: {list(status_mapping.keys())}")
{'id': 'k8KsI84Ds7', 'forum': 'k8KsI84Ds7', 'content': {'title': {'value': 'Process-Driven Autoformalization in Lean 4'}, 'keywords': {'value': ['Large Language Models', 'Autoformalization', 'Lean 4', 'Formal Math', 'Process Supervision', 'Formal Reasoning', 'Mathematical Reasoning', 'AI for Math', 'Automated Theorem Proving']}, 'abstract': {'value': 'Autoformalization, the conversion of natural language mathematics into formal languages, offers significant potential for advancing mathematical reasoning. However, existing efforts are limited to formal languages with substantial online corpora and struggle to keep pace with rapidly evolving languages like Lean 4. To bridge this gap, we propose a large-scale dataset \\textbf{Form}alization for \\textbf{L}ean~\\textbf{4} (\\textbf{\\dataset}) designed to comprehensively evaluate the autoformalization capabilities of large language models (LLMs), encompassing both statements and proofs in natural and formal languages. Additionally, we introduce the\n\\textbf{P}rocess-\\textbf{D}riven \\textbf{A}utoformalization (\\textbf{\\method}) framework\nthat leverages the precise feedback from Lean 4 compilers to enhance autoformalization. \nExtensive experiments demonstrate that \\method improves autoformalization, enabling higher compiler accuracy and human-evaluation scores using less filtered training data. \nMoreover, when fine-tuned with data containing detailed process information, \\method exhibits enhanced data utilization, resulting in more substantial improvements in autoformalization for Lean 4.'}, 'primary_area': {'value': 'neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)'}, 'code_of_ethics': {'value': 'I acknowledge that I and all co-authors of this work have read and commit to adhering to the ICLR Code of Ethics.'}, 'submission_guidelines': {'value': 'I certify that this submission complies with the submission instructions as described on https://iclr.cc/Conferences/2025/AuthorGuide.'}, 'reciprocal_reviewing': {'value': 'I understand the reciprocal reviewing requirement as described on https://iclr.cc/Conferences/2025/CallForPapers. If none of the authors are registered as a reviewer, it may result in a desk rejection at the discretion of the program chairs. To request an exception, please complete this form at https://forms.gle/Huojr6VjkFxiQsUp6.'}, 'anonymous_url': {'value': 'I certify that there is no URL (e.g., github page) that could be used to find authors’ identity.'}, 'no_acknowledgement_section': {'value': 'I certify that there is no acknowledgement section in this submission for double blind review.'}, 'venue': {'value': 'ICLR 2025 Conference Submission'}, 'venueid': {'value': 'ICLR.cc/2025/Conference/Submission'}, 'TLDR': {'value': 'We introduces the FormL4 benchmark to evaluate autoformalization in Lean 4, along with a process-supervised verifier that enhances the accuracy of LLMs in converting informal statements and proofs into formal ones.'}, 'supplementary_material': {'value': '/attachment/68692f2a42584afe8acaad5c9d6e8dbce0c04945.zip'}, 'pdf': {'value': '/pdf/0f8b696ac8eb6b5ad91e51aa8b3a61cc42b660d7.pdf'}, '_bibtex': {'value': '@inproceedings{\nanonymous2024processdriven,\ntitle={Process-Driven Autoformalization in Lean 4},\nauthor={Anonymous},\nbooktitle={Submitted to The Thirteenth International Conference on Learning Representations},\nyear={2024},\nurl={https://openreview.net/forum?id=k8KsI84Ds7},\nnote={under review}\n}'}}, 'invitations': ['ICLR.cc/2025/Conference/-/Submission', 'ICLR.cc/2025/Conference/-/Post_Submission', 'ICLR.cc/2025/Conference/Submission2128/-/Full_Submission'], 'cdate': 1726825565513, 'odate': 1728008565725, 'mdate': 1728790320313, 'signatures': ['ICLR.cc/2025/Conference/Submission2128/Authors'], 'writers': ['ICLR.cc/2025/Conference', 'ICLR.cc/2025/Conference/Submission2128/Authors'], 'readers': ['everyone'], 'license': 'CC BY 4.0'}
这里只有一些信息是我们需要的,编写函数,提取其中的 标题,摘要,关键词,主要领域,论文简述,论文链接和 PDF 链接:
defextract_submission_info(submission): # Helper function to convert timestamps to datetime defconvert_timestamp_to_date(timestamp): return datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d') if timestamp elseNone
defcontains_text(submission: dict, target_text: str, fields: Union[str, List[str]] = ['title', 'abstract'], is_regex: bool = False) -> bool: # If 'all', consider all available keys in the submission for matching if fields == 'all': fields = ['title', 'abstract', 'keywords', 'primary_area', 'TLDR']
# Convert string input for fields into a list ifisinstance(fields, str): fields = [fields] # Iterate over the specified fields for field in fields: content = submission.get(field, "") # Join lists into a single string (e.g., keywords) ifisinstance(content, list): content = " ".join(content) # Check if the target_text is found in the content of the field if is_regex: if re.search(target_text, content): returnTrue else: if target_text in content: returnTrue # If no matches were found, return False returnFalse
信息检索
基于匹配函数,我们可以检索出符合条件的论文:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
defsearch_submissions(submissions: List[Dict], target_text: str, fields: Union[str, List[str]] = ['title', 'abstract'], is_regex: bool = False) -> List[Dict]: """ Search through the list of submissions and return those that match the target text. :param submissions: List of submission dictionaries to search through. :param target_text: The text to search for in each submission. :param fields: The fields to search within for matching. Default is ['title', 'abstract']. :param is_regex: Boolean flag indicating whether to use regex for matching. Default is False. :return: List of submissions matching the target text. """ # List to hold matching submissions matching_submissions = [] for submission in submissions: if contains_text(submission, target_text, fields, is_regex): matching_submissions.append(submission) return matching_submissions