GloVe
import numpy as npimport gensim.downloader as apifrom sklearn.metrics.pairwise import cosine_similarity# 岗位描述和个人简历中提取的关键词列表job_description_keywords = ['Java', 'Python', 'SQL', 'AWS']resume_keywords = ['Python', 'R', 'SQL', 'Data Analysis']# 加载GloVe词向量模型model = api.load("glove-wiki-gigaword-300")# 将关键词转换为词向量,并将它们组合成句子向量job_description_vector = np.mean([model[keyword] for keyword in job_description_keywords if keyword in model], axis=0)resume_vector = np.mean([model[keyword] for keyword in resume_keywords if keyword in model], axis=0)# 计算余弦相似度similarity = cosine_similarity([job_description_vector], [resume_vector])[0][0]print('The similarity between job description and resume is:', similarity)
One-hot
import numpy as npfrom sklearn.metrics.pairwise import cosine_similarity# 岗位描述和个人简历中提取的关键词列表job_description_keywords = ['Java', 'Python', 'SQL', 'AWS']resume_keywords = ['Python', 'R', 'SQL', 'Data Analysis']# 将关键词列表转换为独热编码向量all_keywords = list(set(job_description_keywords + resume_keywords))job_description_vector = [1 if keyword in job_description_keywords else 0 for keyword in all_keywords]resume_vector = [1 if keyword in resume_keywords else 0 for keyword in all_keywords]# 计算余弦相似度similarity = cosine_similarity([job_description_vector], [resume_vector])[0][0]print('The similarity between job description and resume is:', similarity)
Word2Vec
import numpy as npimport gensim.downloader as apifrom sklearn.metrics.pairwise import cosine_similarity# 岗位描述和个人简历中提取的关键词列表job_description_keywords = ['Java', 'Python', 'SQL', 'AWS']resume_keywords = ['Python', 'R', 'SQL', 'Data Analysis']# 加载Word2Vec词向量模型model = api.load("word2vec-google-news-300")# 将关键词转换为词向量,并将它们组合成句子向量job_description_vector = np.mean([model.wv[keyword] for keyword in job_description_keywords if keyword in model.wv], axis=0)resume_vector = np.mean([model.wv[keyword] for keyword in resume_keywords if keyword in model.wv], axis=0)# 计算余弦相似度similarity = cosine_similarity([job_description_vector], [resume_vector])[0][0]print('The similarity between job description and resume is:', similarity)