2 Star 0 Fork 0

Guojie Luo / engineering-village

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
MS.py 3.30 KB
一键复制 编辑 原始数据 按行查看 历史
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import requests
import json
import os
from multiprocessing import Pool, freeze_support
import sys
import codecs
import logging
def get_authors_from_conference(abbr, year):
""" Return authors from a conference. """
count = 1000
key = '632667c894cd4187978598134d705272'
url = 'https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate'
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': key
}
payload = {
'expr': "AND(Composite(C.CN='%s'),Y=%s)" % (abbr, year),
'model': 'latest',
'count': count,
'offset': 0,
'attributes': 'AA.S,AA.AuN,AA.AfN,C.CN,Ti,Y'
}
s=requests.Session()
r = requests.get(url, headers=headers, params=payload)
j = r.json()
author_list = []
error_list=[]
if 'entities' in j:
entities = j['entities']
else:
print('%s %s search error!'%(abbr,year))
error_item={'abbr':abbr,'year':year}
error_list.append(error_item)
return author_list
#skip the error search item
assert(len(entities) < count)
for paper in entities:
title = paper['Ti']
pages = ''
for author in paper['AA']:
order = str(author['S'])
email = ''
name = author['AuN'].title()
if ',' not in name:
last_name = name.split()[-1]
first_name = name.split()
first_name.pop()
first_name = ' '.join(first_name)
name = '%s, %s' % (last_name, first_name)
affiliation = author['AfN'] if 'AfN' in author else ''
author_item = [abbr, year, title, pages,
order, name, email, affiliation]
author_list.append(author_item)
return author_list
def main():
""" Craw author lists from Microsoft Academic Graph """
if len(sys.argv) <= 1:
print('Usage: MS.py <conference-list>', file=sys.stderr)
return
filename = sys.argv[1]
save_path='results-cc/'
if os.path.exists(save_path):
pass
else:
os.mkdir(save_path)
logging.basicConfig(level=logging.INFO)
uopen = lambda path, mode: open(path, mode, encoding='utf-8')
requests.session().cookies.clear()
for line in uopen(filename, 'r'):
abbr, year = line.rstrip().split('\t')
print(abbr,year)
print('[Working] extract %s%s papers' % (abbr, year), file=sys.stderr)
author_list = get_authors_from_conference(abbr, year)
if len(author_list) == 0:
print('[Warning] %s%s papers not found' % (abbr, year), file=sys.stderr)
# Output csv format: abbr, year, title, pages, order, author, email, affiliation
csv_filename = '%s%s.csv' % (abbr, year)
with open(save_path+csv_filename, 'wb') as csv: # for excel (byte mode + BOM)
csv.write(codecs.BOM_UTF8)
add_quote = lambda x: '"%s"' % x if ',' in x else x
csv.write('\n'.join(
','.join(add_quote(field) for field in author_info)
for author_info in author_list).encode('utf-8'))
print('[Done] %s written' % csv_filename, file=sys.stderr)
print('', file=sys.stderr)
if __name__ == '__main__':
freeze_support()
main()
1
https://gitee.com/lowerbound/engineering-village.git
git@gitee.com:lowerbound/engineering-village.git
lowerbound
engineering-village
engineering-village
master

搜索帮助